aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/tools/clang/lib/Format
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Format')
-rw-r--r--contrib/llvm/tools/clang/lib/Format/Format.cpp1763
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp1187
-rw-r--r--contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h262
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp858
-rw-r--r--contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h201
5 files changed, 4271 insertions, 0 deletions
diff --git a/contrib/llvm/tools/clang/lib/Format/Format.cpp b/contrib/llvm/tools/clang/lib/Format/Format.cpp
new file mode 100644
index 000000000000..101b16f1a18c
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/Format.cpp
@@ -0,0 +1,1763 @@
+//===--- Format.cpp - Format C++ code -------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements functions declared in Format.h. This will be
+/// split into separate files as we go.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "format-formatter"
+
+#include "TokenAnnotator.h"
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Lex/Lexer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include <queue>
+#include <string>
+
+namespace clang {
+namespace format {
+
+FormatStyle getLLVMStyle() {
+ FormatStyle LLVMStyle;
+ LLVMStyle.ColumnLimit = 80;
+ LLVMStyle.MaxEmptyLinesToKeep = 1;
+ LLVMStyle.PointerBindsToType = false;
+ LLVMStyle.DerivePointerBinding = false;
+ LLVMStyle.AccessModifierOffset = -2;
+ LLVMStyle.Standard = FormatStyle::LS_Cpp03;
+ LLVMStyle.IndentCaseLabels = false;
+ LLVMStyle.SpacesBeforeTrailingComments = 1;
+ LLVMStyle.BinPackParameters = true;
+ LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true;
+ LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false;
+ LLVMStyle.AllowShortIfStatementsOnASingleLine = false;
+ LLVMStyle.ObjCSpaceBeforeProtocolList = true;
+ LLVMStyle.PenaltyExcessCharacter = 1000000;
+ LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 5;
+ return LLVMStyle;
+}
+
+FormatStyle getGoogleStyle() {
+ FormatStyle GoogleStyle;
+ GoogleStyle.ColumnLimit = 80;
+ GoogleStyle.MaxEmptyLinesToKeep = 1;
+ GoogleStyle.PointerBindsToType = true;
+ GoogleStyle.DerivePointerBinding = true;
+ GoogleStyle.AccessModifierOffset = -1;
+ GoogleStyle.Standard = FormatStyle::LS_Auto;
+ GoogleStyle.IndentCaseLabels = true;
+ GoogleStyle.SpacesBeforeTrailingComments = 2;
+ GoogleStyle.BinPackParameters = true;
+ GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true;
+ GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true;
+ GoogleStyle.AllowShortIfStatementsOnASingleLine = false;
+ GoogleStyle.ObjCSpaceBeforeProtocolList = false;
+ GoogleStyle.PenaltyExcessCharacter = 1000000;
+ GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 100;
+ return GoogleStyle;
+}
+
+FormatStyle getChromiumStyle() {
+ FormatStyle ChromiumStyle = getGoogleStyle();
+ ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false;
+ ChromiumStyle.BinPackParameters = false;
+ ChromiumStyle.Standard = FormatStyle::LS_Cpp03;
+ ChromiumStyle.DerivePointerBinding = false;
+ return ChromiumStyle;
+}
+
+static bool isTrailingComment(const AnnotatedToken &Tok) {
+ return Tok.is(tok::comment) &&
+ (Tok.Children.empty() || Tok.Children[0].MustBreakBefore);
+}
+
+static bool isComparison(const AnnotatedToken &Tok) {
+ prec::Level Precedence = getPrecedence(Tok);
+ return Tok.Type == TT_BinaryOperator &&
+ (Precedence == prec::Equality || Precedence == prec::Relational);
+}
+
+// Returns the length of everything up to the first possible line break after
+// the ), ], } or > matching \c Tok.
+static unsigned getLengthToMatchingParen(const AnnotatedToken &Tok) {
+ if (Tok.MatchingParen == NULL)
+ return 0;
+ AnnotatedToken *End = Tok.MatchingParen;
+ while (!End->Children.empty() && !End->Children[0].CanBreakBefore) {
+ End = &End->Children[0];
+ }
+ return End->TotalLength - Tok.TotalLength + 1;
+}
+
+static size_t
+calculateColumnLimit(const FormatStyle &Style, bool InPPDirective) {
+ // In preprocessor directives reserve two chars for trailing " \"
+ return Style.ColumnLimit - (InPPDirective ? 2 : 0);
+}
+
+/// \brief Manages the whitespaces around tokens and their replacements.
+///
+/// This includes special handling for certain constructs, e.g. the alignment of
+/// trailing line comments.
+class WhitespaceManager {
+public:
+ WhitespaceManager(SourceManager &SourceMgr, const FormatStyle &Style)
+ : SourceMgr(SourceMgr), Style(Style) {}
+
+ /// \brief Replaces the whitespace in front of \p Tok. Only call once for
+ /// each \c AnnotatedToken.
+ void replaceWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+ unsigned Spaces, unsigned WhitespaceStartColumn) {
+ // 2+ newlines mean an empty line separating logic scopes.
+ if (NewLines >= 2)
+ alignComments();
+
+ SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+ bool LineExceedsColumnLimit = Spaces + WhitespaceStartColumn +
+ Tok.FormatTok.TokenLength > Style.ColumnLimit;
+
+ // Align line comments if they are trailing or if they continue other
+ // trailing comments.
+ if (isTrailingComment(Tok)) {
+ // Remove the comment's trailing whitespace.
+ if (Tok.FormatTok.Tok.getLength() != Tok.FormatTok.TokenLength)
+ Replaces.insert(tooling::Replacement(
+ SourceMgr, TokenLoc.getLocWithOffset(Tok.FormatTok.TokenLength),
+ Tok.FormatTok.Tok.getLength() - Tok.FormatTok.TokenLength, ""));
+
+ // Align comment with other comments.
+ if ((Tok.Parent != NULL || !Comments.empty()) &&
+ !LineExceedsColumnLimit) {
+ StoredComment Comment;
+ Comment.Tok = Tok.FormatTok;
+ Comment.Spaces = Spaces;
+ Comment.NewLines = NewLines;
+ Comment.MinColumn =
+ NewLines > 0 ? Spaces : WhitespaceStartColumn + Spaces;
+ Comment.MaxColumn = Style.ColumnLimit - Tok.FormatTok.TokenLength;
+ Comment.Untouchable = false;
+ Comments.push_back(Comment);
+ return;
+ }
+ }
+
+ // If this line does not have a trailing comment, align the stored comments.
+ if (Tok.Children.empty() && !isTrailingComment(Tok))
+ alignComments();
+
+ if (Tok.Type == TT_BlockComment) {
+ indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, false);
+ } else if (Tok.Type == TT_LineComment && LineExceedsColumnLimit) {
+ StringRef Line(SourceMgr.getCharacterData(TokenLoc),
+ Tok.FormatTok.TokenLength);
+ int StartColumn = Spaces + (NewLines == 0 ? WhitespaceStartColumn : 0);
+ StringRef Prefix = getLineCommentPrefix(Line);
+ std::string NewPrefix = std::string(StartColumn, ' ') + Prefix.str();
+ splitLineInComment(Tok.FormatTok, Line.substr(Prefix.size()),
+ StartColumn + Prefix.size(), NewPrefix,
+ /*InPPDirective=*/ false,
+ /*CommentHasMoreLines=*/ false);
+ }
+
+ storeReplacement(Tok.FormatTok, getNewLineText(NewLines, Spaces));
+ }
+
+ /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
+ /// backslashes to escape newlines inside a preprocessor directive.
+ ///
+ /// This function and \c replaceWhitespace have the same behavior if
+ /// \c Newlines == 0.
+ void replacePPWhitespace(const AnnotatedToken &Tok, unsigned NewLines,
+ unsigned Spaces, unsigned WhitespaceStartColumn) {
+ if (Tok.Type == TT_BlockComment)
+ indentBlockComment(Tok, Spaces, WhitespaceStartColumn, NewLines, true);
+
+ storeReplacement(Tok.FormatTok,
+ getNewLineText(NewLines, Spaces, WhitespaceStartColumn));
+ }
+
+ /// \brief Inserts a line break into the middle of a token.
+ ///
+ /// Will break at \p Offset inside \p Tok, putting \p Prefix before the line
+ /// break and \p Postfix before the rest of the token starts in the next line.
+ ///
+ /// \p InPPDirective, \p Spaces, \p WhitespaceStartColumn and \p Style are
+ /// used to generate the correct line break.
+ void breakToken(const FormatToken &Tok, unsigned Offset,
+ unsigned ReplaceChars, StringRef Prefix, StringRef Postfix,
+ bool InPPDirective, unsigned Spaces,
+ unsigned WhitespaceStartColumn) {
+ std::string NewLineText;
+ if (!InPPDirective)
+ NewLineText = getNewLineText(1, Spaces);
+ else
+ NewLineText = getNewLineText(1, Spaces, WhitespaceStartColumn);
+ std::string ReplacementText = (Prefix + NewLineText + Postfix).str();
+ SourceLocation Location = Tok.Tok.getLocation().getLocWithOffset(Offset);
+ Replaces.insert(tooling::Replacement(SourceMgr, Location, ReplaceChars,
+ ReplacementText));
+ }
+
+ /// \brief Returns all the \c Replacements created during formatting.
+ const tooling::Replacements &generateReplacements() {
+ alignComments();
+ return Replaces;
+ }
+
+ void addUntouchableComment(unsigned Column) {
+ StoredComment Comment;
+ Comment.MinColumn = Column;
+ Comment.MaxColumn = Column;
+ Comment.Untouchable = true;
+ Comments.push_back(Comment);
+ }
+
+private:
+ static StringRef getLineCommentPrefix(StringRef Comment) {
+ const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" };
+ for (size_t i = 0; i < llvm::array_lengthof(KnownPrefixes); ++i)
+ if (Comment.startswith(KnownPrefixes[i]))
+ return KnownPrefixes[i];
+ return "";
+ }
+
+ /// \brief Finds a common prefix of lines of a block comment to properly
+ /// indent (and possibly decorate with '*'s) added lines.
+ ///
+ /// The first line is ignored (it's special and starts with /*). The number of
+ /// lines should be more than one.
+ static StringRef findCommentLinesPrefix(ArrayRef<StringRef> Lines,
+ const char *PrefixChars = " *") {
+ assert(Lines.size() > 1);
+ StringRef Prefix(Lines[1].data(), Lines[1].find_first_not_of(PrefixChars));
+ for (size_t i = 2; i < Lines.size(); ++i) {
+ for (size_t j = 0; j < Prefix.size() && j < Lines[i].size(); ++j) {
+ if (Prefix[j] != Lines[i][j]) {
+ Prefix = Prefix.substr(0, j);
+ break;
+ }
+ }
+ }
+ return Prefix;
+ }
+
+ /// \brief Splits one line in a line or block comment, if it doesn't fit to
+ /// provided column limit. Removes trailing whitespace in each line.
+ ///
+ /// \param Line points to the line contents without leading // or /*.
+ ///
+ /// \param StartColumn is the column where the first character of Line will be
+ /// located after formatting.
+ ///
+ /// \param LinePrefix is inserted after each line break.
+ ///
+ /// When \param InPPDirective is true, each line break will be preceded by a
+ /// backslash in the last column to make line breaks inside the comment
+ /// visually consistent with line breaks outside the comment. This only makes
+ /// sense for block comments.
+ ///
+ /// When \param CommentHasMoreLines is false, no line breaks/trailing
+ /// backslashes will be inserted after it.
+ void splitLineInComment(const FormatToken &Tok, StringRef Line,
+ size_t StartColumn, StringRef LinePrefix,
+ bool InPPDirective, bool CommentHasMoreLines,
+ const char *WhiteSpaceChars = " ") {
+ size_t ColumnLimit = calculateColumnLimit(Style, InPPDirective);
+ const char *TokenStart = SourceMgr.getCharacterData(Tok.Tok.getLocation());
+
+ StringRef TrimmedLine = Line.rtrim();
+ int TrailingSpaceLength = Line.size() - TrimmedLine.size();
+
+ // Don't touch leading whitespace.
+ Line = TrimmedLine.ltrim();
+ StartColumn += TrimmedLine.size() - Line.size();
+
+ while (Line.size() + StartColumn > ColumnLimit) {
+ // Try to break at the last whitespace before the column limit.
+ size_t SpacePos =
+ Line.find_last_of(WhiteSpaceChars, ColumnLimit - StartColumn + 1);
+ if (SpacePos == StringRef::npos) {
+ // Try to find any whitespace in the line.
+ SpacePos = Line.find_first_of(WhiteSpaceChars);
+ if (SpacePos == StringRef::npos) // No whitespace found, give up.
+ break;
+ }
+
+ StringRef NextCut = Line.substr(0, SpacePos).rtrim();
+ StringRef RemainingLine = Line.substr(SpacePos).ltrim();
+ if (RemainingLine.empty())
+ break;
+
+ if (RemainingLine == "*/" && LinePrefix.endswith("* "))
+ LinePrefix = LinePrefix.substr(0, LinePrefix.size() - 2);
+
+ Line = RemainingLine;
+
+ size_t ReplaceChars = Line.begin() - NextCut.end();
+ breakToken(Tok, NextCut.end() - TokenStart, ReplaceChars, "", LinePrefix,
+ InPPDirective, 0, NextCut.size() + StartColumn);
+ StartColumn = LinePrefix.size();
+ }
+
+ if (TrailingSpaceLength > 0 || (InPPDirective && CommentHasMoreLines)) {
+ // Remove trailing whitespace/insert backslash. + 1 is for \n
+ breakToken(Tok, Line.end() - TokenStart, TrailingSpaceLength + 1, "", "",
+ InPPDirective, 0, Line.size() + StartColumn);
+ }
+ }
+
+ /// \brief Changes indentation of all lines in a block comment by Indent,
+ /// removes trailing whitespace from each line, splits lines that end up
+ /// exceeding the column limit.
+ void indentBlockComment(const AnnotatedToken &Tok, int Indent,
+ int WhitespaceStartColumn, int NewLines,
+ bool InPPDirective) {
+ assert(Tok.Type == TT_BlockComment);
+ int StartColumn = Indent + (NewLines == 0 ? WhitespaceStartColumn : 0);
+ const SourceLocation TokenLoc = Tok.FormatTok.Tok.getLocation();
+ const int CurrentIndent = SourceMgr.getSpellingColumnNumber(TokenLoc) - 1;
+ const int IndentDelta = Indent - CurrentIndent;
+ const StringRef Text(SourceMgr.getCharacterData(TokenLoc),
+ Tok.FormatTok.TokenLength);
+ assert(Text.startswith("/*") && Text.endswith("*/"));
+
+ SmallVector<StringRef, 16> Lines;
+ Text.split(Lines, "\n");
+
+ if (IndentDelta > 0) {
+ std::string WhiteSpace(IndentDelta, ' ');
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ Replaces.insert(tooling::Replacement(
+ SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+ 0, WhiteSpace));
+ }
+ } else if (IndentDelta < 0) {
+ std::string WhiteSpace(-IndentDelta, ' ');
+ // Check that the line is indented enough.
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ if (!Lines[i].startswith(WhiteSpace))
+ return;
+ }
+ for (size_t i = 1; i < Lines.size(); ++i) {
+ Replaces.insert(tooling::Replacement(
+ SourceMgr, TokenLoc.getLocWithOffset(Lines[i].data() - Text.data()),
+ -IndentDelta, ""));
+ }
+ }
+
+ // Split long lines in comments.
+ size_t OldPrefixSize = 0;
+ std::string NewPrefix;
+ if (Lines.size() > 1) {
+ StringRef CurrentPrefix = findCommentLinesPrefix(Lines);
+ OldPrefixSize = CurrentPrefix.size();
+ NewPrefix = (IndentDelta < 0)
+ ? CurrentPrefix.substr(-IndentDelta).str()
+ : std::string(IndentDelta, ' ') + CurrentPrefix.str();
+ if (CurrentPrefix.endswith("*")) {
+ NewPrefix += " ";
+ ++OldPrefixSize;
+ }
+ } else if (Tok.Parent == 0) {
+ NewPrefix = std::string(StartColumn, ' ') + " * ";
+ }
+
+ StartColumn += 2;
+ for (size_t i = 0; i < Lines.size(); ++i) {
+ StringRef Line = Lines[i].substr(i == 0 ? 2 : OldPrefixSize);
+ splitLineInComment(Tok.FormatTok, Line, StartColumn, NewPrefix,
+ InPPDirective, i != Lines.size() - 1);
+ StartColumn = NewPrefix.size();
+ }
+ }
+
+ std::string getNewLineText(unsigned NewLines, unsigned Spaces) {
+ return std::string(NewLines, '\n') + std::string(Spaces, ' ');
+ }
+
+ std::string getNewLineText(unsigned NewLines, unsigned Spaces,
+ unsigned WhitespaceStartColumn) {
+ std::string NewLineText;
+ if (NewLines > 0) {
+ unsigned Offset =
+ std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
+ for (unsigned i = 0; i < NewLines; ++i) {
+ NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
+ NewLineText += "\\\n";
+ Offset = 0;
+ }
+ }
+ return NewLineText + std::string(Spaces, ' ');
+ }
+
+ /// \brief Structure to store a comment for later layout and alignment.
+ struct StoredComment {
+ FormatToken Tok;
+ unsigned MinColumn;
+ unsigned MaxColumn;
+ unsigned NewLines;
+ unsigned Spaces;
+ bool Untouchable;
+ };
+ SmallVector<StoredComment, 16> Comments;
+ typedef SmallVector<StoredComment, 16>::iterator comment_iterator;
+
+ /// \brief Try to align all stashed comments.
+ void alignComments() {
+ unsigned MinColumn = 0;
+ unsigned MaxColumn = UINT_MAX;
+ comment_iterator Start = Comments.begin();
+ for (comment_iterator I = Start, E = Comments.end(); I != E; ++I) {
+ if (I->MinColumn > MaxColumn || I->MaxColumn < MinColumn) {
+ alignComments(Start, I, MinColumn);
+ MinColumn = I->MinColumn;
+ MaxColumn = I->MaxColumn;
+ Start = I;
+ } else {
+ MinColumn = std::max(MinColumn, I->MinColumn);
+ MaxColumn = std::min(MaxColumn, I->MaxColumn);
+ }
+ }
+ alignComments(Start, Comments.end(), MinColumn);
+ Comments.clear();
+ }
+
+ /// \brief Put all the comments between \p I and \p E into \p Column.
+ void alignComments(comment_iterator I, comment_iterator E, unsigned Column) {
+ while (I != E) {
+ if (!I->Untouchable) {
+ unsigned Spaces = I->Spaces + Column - I->MinColumn;
+ storeReplacement(I->Tok, getNewLineText(I->NewLines, Spaces));
+ }
+ ++I;
+ }
+ }
+
+ /// \brief Stores \p Text as the replacement for the whitespace in front of
+ /// \p Tok.
+ void storeReplacement(const FormatToken &Tok, const std::string Text) {
+ // Don't create a replacement, if it does not change anything.
+ if (StringRef(SourceMgr.getCharacterData(Tok.WhiteSpaceStart),
+ Tok.WhiteSpaceLength) == Text)
+ return;
+
+ Replaces.insert(tooling::Replacement(SourceMgr, Tok.WhiteSpaceStart,
+ Tok.WhiteSpaceLength, Text));
+ }
+
+ SourceManager &SourceMgr;
+ tooling::Replacements Replaces;
+ const FormatStyle &Style;
+};
+
+class UnwrappedLineFormatter {
+public:
+ UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
+ const AnnotatedLine &Line, unsigned FirstIndent,
+ const AnnotatedToken &RootToken,
+ WhitespaceManager &Whitespaces, bool StructuralError)
+ : Style(Style), SourceMgr(SourceMgr), Line(Line),
+ FirstIndent(FirstIndent), RootToken(RootToken),
+ Whitespaces(Whitespaces), Count(0) {}
+
+ /// \brief Formats an \c UnwrappedLine.
+ ///
+ /// \returns The column after the last token in the last line of the
+ /// \c UnwrappedLine.
+ unsigned format(const AnnotatedLine *NextLine) {
+ // Initialize state dependent on indent.
+ LineState State;
+ State.Column = FirstIndent;
+ State.NextToken = &RootToken;
+ State.Stack.push_back(
+ ParenState(FirstIndent, FirstIndent, !Style.BinPackParameters,
+ /*HasMultiParameterLine=*/ false));
+ State.LineContainsContinuedForLoopSection = false;
+ State.ParenLevel = 0;
+ State.StartOfStringLiteral = 0;
+ State.StartOfLineLevel = State.ParenLevel;
+
+ DEBUG({
+ DebugTokenState(*State.NextToken);
+ });
+
+ // The first token has already been indented and thus consumed.
+ moveStateToNextToken(State, /*DryRun=*/ false);
+
+ // If everything fits on a single line, just put it there.
+ unsigned ColumnLimit = Style.ColumnLimit;
+ if (NextLine && NextLine->InPPDirective &&
+ !NextLine->First.FormatTok.HasUnescapedNewline)
+ ColumnLimit = getColumnLimit();
+ if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) {
+ while (State.NextToken != NULL) {
+ addTokenToState(false, false, State);
+ }
+ return State.Column;
+ }
+
+ // If the ObjC method declaration does not fit on a line, we should format
+ // it with one arg per line.
+ if (Line.Type == LT_ObjCMethodDecl)
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // Find best solution in solution space.
+ return analyzeSolutionSpace(State);
+ }
+
+private:
+ void DebugTokenState(const AnnotatedToken &AnnotatedTok) {
+ const Token &Tok = AnnotatedTok.FormatTok.Tok;
+ llvm::errs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
+ Tok.getLength());
+ llvm::errs();
+ }
+
+ struct ParenState {
+ ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking,
+ bool HasMultiParameterLine)
+ : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0),
+ BreakBeforeClosingBrace(false), QuestionColumn(0),
+ AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
+ HasMultiParameterLine(HasMultiParameterLine), ColonPos(0),
+ StartOfFunctionCall(0), NestedNameSpecifierContinuation(0),
+ CallContinuation(0), VariablePos(0) {}
+
+ /// \brief The position to which a specific parenthesis level needs to be
+ /// indented.
+ unsigned Indent;
+
+ /// \brief The position of the last space on each level.
+ ///
+ /// Used e.g. to break like:
+ /// functionCall(Parameter, otherCall(
+ /// OtherParameter));
+ unsigned LastSpace;
+
+ /// \brief The position the first "<<" operator encountered on each level.
+ ///
+ /// Used to align "<<" operators. 0 if no such operator has been encountered
+ /// on a level.
+ unsigned FirstLessLess;
+
+ /// \brief Whether a newline needs to be inserted before the block's closing
+ /// brace.
+ ///
+ /// We only want to insert a newline before the closing brace if there also
+ /// was a newline after the beginning left brace.
+ bool BreakBeforeClosingBrace;
+
+ /// \brief The column of a \c ? in a conditional expression;
+ unsigned QuestionColumn;
+
+ /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
+ /// lines, in this context.
+ bool AvoidBinPacking;
+
+ /// \brief Break after the next comma (or all the commas in this context if
+ /// \c AvoidBinPacking is \c true).
+ bool BreakBeforeParameter;
+
+ /// \brief This context already has a line with more than one parameter.
+ bool HasMultiParameterLine;
+
+ /// \brief The position of the colon in an ObjC method declaration/call.
+ unsigned ColonPos;
+
+ /// \brief The start of the most recent function in a builder-type call.
+ unsigned StartOfFunctionCall;
+
+ /// \brief If a nested name specifier was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned NestedNameSpecifierContinuation;
+
+ /// \brief If a call expression was broken over multiple lines, this
+ /// contains the start column of the second line. Otherwise 0.
+ unsigned CallContinuation;
+
+ /// \brief The column of the first variable name in a variable declaration.
+ ///
+ /// Used to align further variables if necessary.
+ unsigned VariablePos;
+
+ bool operator<(const ParenState &Other) const {
+ if (Indent != Other.Indent)
+ return Indent < Other.Indent;
+ if (LastSpace != Other.LastSpace)
+ return LastSpace < Other.LastSpace;
+ if (FirstLessLess != Other.FirstLessLess)
+ return FirstLessLess < Other.FirstLessLess;
+ if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
+ return BreakBeforeClosingBrace;
+ if (QuestionColumn != Other.QuestionColumn)
+ return QuestionColumn < Other.QuestionColumn;
+ if (AvoidBinPacking != Other.AvoidBinPacking)
+ return AvoidBinPacking;
+ if (BreakBeforeParameter != Other.BreakBeforeParameter)
+ return BreakBeforeParameter;
+ if (HasMultiParameterLine != Other.HasMultiParameterLine)
+ return HasMultiParameterLine;
+ if (ColonPos != Other.ColonPos)
+ return ColonPos < Other.ColonPos;
+ if (StartOfFunctionCall != Other.StartOfFunctionCall)
+ return StartOfFunctionCall < Other.StartOfFunctionCall;
+ if (NestedNameSpecifierContinuation !=
+ Other.NestedNameSpecifierContinuation)
+ return NestedNameSpecifierContinuation <
+ Other.NestedNameSpecifierContinuation;
+ if (CallContinuation != Other.CallContinuation)
+ return CallContinuation < Other.CallContinuation;
+ if (VariablePos != Other.VariablePos)
+ return VariablePos < Other.VariablePos;
+ return false;
+ }
+ };
+
+ /// \brief The current state when indenting a unwrapped line.
+ ///
+ /// As the indenting tries different combinations this is copied by value.
+ struct LineState {
+ /// \brief The number of used columns in the current line.
+ unsigned Column;
+
+ /// \brief The token that needs to be next formatted.
+ const AnnotatedToken *NextToken;
+
+ /// \brief \c true if this line contains a continued for-loop section.
+ bool LineContainsContinuedForLoopSection;
+
+ /// \brief The level of nesting inside (), [], <> and {}.
+ unsigned ParenLevel;
+
+ /// \brief The \c ParenLevel at the start of this line.
+ unsigned StartOfLineLevel;
+
+ /// \brief The start column of the string literal, if we're in a string
+ /// literal sequence, 0 otherwise.
+ unsigned StartOfStringLiteral;
+
+ /// \brief A stack keeping track of properties applying to parenthesis
+ /// levels.
+ std::vector<ParenState> Stack;
+
+ /// \brief Comparison operator to be able to used \c LineState in \c map.
+ bool operator<(const LineState &Other) const {
+ if (NextToken != Other.NextToken)
+ return NextToken < Other.NextToken;
+ if (Column != Other.Column)
+ return Column < Other.Column;
+ if (LineContainsContinuedForLoopSection !=
+ Other.LineContainsContinuedForLoopSection)
+ return LineContainsContinuedForLoopSection;
+ if (ParenLevel != Other.ParenLevel)
+ return ParenLevel < Other.ParenLevel;
+ if (StartOfLineLevel != Other.StartOfLineLevel)
+ return StartOfLineLevel < Other.StartOfLineLevel;
+ if (StartOfStringLiteral != Other.StartOfStringLiteral)
+ return StartOfStringLiteral < Other.StartOfStringLiteral;
+ return Stack < Other.Stack;
+ }
+ };
+
+ /// \brief Appends the next token to \p State and updates information
+ /// necessary for indentation.
+ ///
+ /// Puts the token on the current line if \p Newline is \c true and adds a
+ /// line break and necessary indentation otherwise.
+ ///
+ /// If \p DryRun is \c false, also creates and stores the required
+ /// \c Replacement.
+ unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) {
+ const AnnotatedToken &Current = *State.NextToken;
+ const AnnotatedToken &Previous = *State.NextToken->Parent;
+
+ if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) {
+ State.Column += State.NextToken->FormatTok.WhiteSpaceLength +
+ State.NextToken->FormatTok.TokenLength;
+ if (State.NextToken->Children.empty())
+ State.NextToken = NULL;
+ else
+ State.NextToken = &State.NextToken->Children[0];
+ return 0;
+ }
+
+ // If we are continuing an expression, we want to indent an extra 4 spaces.
+ unsigned ContinuationIndent =
+ std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4;
+ if (Newline) {
+ unsigned WhitespaceStartColumn = State.Column;
+ if (Current.is(tok::r_brace)) {
+ State.Column = Line.Level * 2;
+ } else if (Current.is(tok::string_literal) &&
+ State.StartOfStringLiteral != 0) {
+ State.Column = State.StartOfStringLiteral;
+ State.Stack.back().BreakBeforeParameter = true;
+ } else if (Current.is(tok::lessless) &&
+ State.Stack.back().FirstLessLess != 0) {
+ State.Column = State.Stack.back().FirstLessLess;
+ } else if (Previous.is(tok::coloncolon)) {
+ if (State.Stack.back().NestedNameSpecifierContinuation == 0) {
+ State.Column = ContinuationIndent;
+ State.Stack.back().NestedNameSpecifierContinuation = State.Column;
+ } else {
+ State.Column = State.Stack.back().NestedNameSpecifierContinuation;
+ }
+ } else if (Current.isOneOf(tok::period, tok::arrow)) {
+ if (State.Stack.back().CallContinuation == 0) {
+ State.Column = ContinuationIndent;
+ State.Stack.back().CallContinuation = State.Column;
+ } else {
+ State.Column = State.Stack.back().CallContinuation;
+ }
+ } else if (Current.Type == TT_ConditionalExpr) {
+ State.Column = State.Stack.back().QuestionColumn;
+ } else if (Previous.is(tok::comma) &&
+ State.Stack.back().VariablePos != 0) {
+ State.Column = State.Stack.back().VariablePos;
+ } else if (Previous.ClosesTemplateDeclaration ||
+ (Current.Type == TT_StartOfName && State.ParenLevel == 0)) {
+ State.Column = State.Stack.back().Indent;
+ } else if (Current.Type == TT_ObjCSelectorName) {
+ if (State.Stack.back().ColonPos > Current.FormatTok.TokenLength) {
+ State.Column =
+ State.Stack.back().ColonPos - Current.FormatTok.TokenLength;
+ } else {
+ State.Column = State.Stack.back().Indent;
+ State.Stack.back().ColonPos =
+ State.Column + Current.FormatTok.TokenLength;
+ }
+ } else if (Current.Type == TT_StartOfName || Current.is(tok::question) ||
+ Previous.is(tok::equal) || isComparison(Previous) ||
+ Previous.Type == TT_ObjCMethodExpr) {
+ State.Column = ContinuationIndent;
+ } else {
+ State.Column = State.Stack.back().Indent;
+ // Ensure that we fall back to indenting 4 spaces instead of just
+ // flushing continuations left.
+ if (State.Column == FirstIndent)
+ State.Column += 4;
+ }
+
+ if (Current.is(tok::question))
+ State.Stack.back().BreakBeforeParameter = true;
+ if (Previous.isOneOf(tok::comma, tok::semi) &&
+ !State.Stack.back().AvoidBinPacking)
+ State.Stack.back().BreakBeforeParameter = false;
+
+ if (!DryRun) {
+ unsigned NewLines = 1;
+ if (Current.Type == TT_LineComment)
+ NewLines =
+ std::max(NewLines, std::min(Current.FormatTok.NewlinesBefore,
+ Style.MaxEmptyLinesToKeep + 1));
+ if (!Line.InPPDirective)
+ Whitespaces.replaceWhitespace(Current, NewLines, State.Column,
+ WhitespaceStartColumn);
+ else
+ Whitespaces.replacePPWhitespace(Current, NewLines, State.Column,
+ WhitespaceStartColumn);
+ }
+
+ State.Stack.back().LastSpace = State.Column;
+ State.StartOfLineLevel = State.ParenLevel;
+
+ // Any break on this level means that the parent level has been broken
+ // and we need to avoid bin packing there.
+ for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) {
+ State.Stack[i].BreakBeforeParameter = true;
+ }
+ if (Current.isOneOf(tok::period, tok::arrow))
+ State.Stack.back().BreakBeforeParameter = true;
+
+ // If we break after {, we should also break before the corresponding }.
+ if (Previous.is(tok::l_brace))
+ State.Stack.back().BreakBeforeClosingBrace = true;
+
+ if (State.Stack.back().AvoidBinPacking) {
+ // If we are breaking after '(', '{', '<', this is not bin packing
+ // unless AllowAllParametersOfDeclarationOnNextLine is false.
+ if ((Previous.isNot(tok::l_paren) && Previous.isNot(tok::l_brace)) ||
+ (!Style.AllowAllParametersOfDeclarationOnNextLine &&
+ Line.MustBeDeclaration))
+ State.Stack.back().BreakBeforeParameter = true;
+ }
+ } else {
+ if (Current.is(tok::equal) &&
+ (RootToken.is(tok::kw_for) || State.ParenLevel == 0) &&
+ State.Stack.back().VariablePos == 0) {
+ State.Stack.back().VariablePos = State.Column;
+ // Move over * and & if they are bound to the variable name.
+ const AnnotatedToken *Tok = &Previous;
+ while (Tok &&
+ State.Stack.back().VariablePos >= Tok->FormatTok.TokenLength) {
+ State.Stack.back().VariablePos -= Tok->FormatTok.TokenLength;
+ if (Tok->SpacesRequiredBefore != 0)
+ break;
+ Tok = Tok->Parent;
+ }
+ if (Previous.PartOfMultiVariableDeclStmt)
+ State.Stack.back().LastSpace = State.Stack.back().VariablePos;
+ }
+
+ unsigned Spaces = State.NextToken->SpacesRequiredBefore;
+
+ if (!DryRun)
+ Whitespaces.replaceWhitespace(Current, 0, Spaces, State.Column);
+
+ if (Current.Type == TT_ObjCSelectorName &&
+ State.Stack.back().ColonPos == 0) {
+ if (State.Stack.back().Indent + Current.LongestObjCSelectorName >
+ State.Column + Spaces + Current.FormatTok.TokenLength)
+ State.Stack.back().ColonPos =
+ State.Stack.back().Indent + Current.LongestObjCSelectorName;
+ else
+ State.Stack.back().ColonPos =
+ State.Column + Spaces + Current.FormatTok.TokenLength;
+ }
+
+ if (Current.Type != TT_LineComment &&
+ (Previous.isOneOf(tok::l_paren, tok::l_brace) ||
+ State.NextToken->Parent->Type == TT_TemplateOpener))
+ State.Stack.back().Indent = State.Column + Spaces;
+ if (Previous.is(tok::comma) && !isTrailingComment(Current))
+ State.Stack.back().HasMultiParameterLine = true;
+
+ State.Column += Spaces;
+ if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for))
+ // Treat the condition inside an if as if it was a second function
+ // parameter, i.e. let nested calls have an indent of 4.
+ State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(".
+ else if (Previous.is(tok::comma))
+ State.Stack.back().LastSpace = State.Column;
+ else if ((Previous.Type == TT_BinaryOperator ||
+ Previous.Type == TT_ConditionalExpr ||
+ Previous.Type == TT_CtorInitializerColon) &&
+ getPrecedence(Previous) != prec::Assignment)
+ State.Stack.back().LastSpace = State.Column;
+ else if (Previous.Type == TT_InheritanceColon)
+ State.Stack.back().Indent = State.Column;
+ else if (Previous.ParameterCount > 1 &&
+ (Previous.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
+ Previous.Type == TT_TemplateOpener))
+ // If this function has multiple parameters, indent nested calls from
+ // the start of the first parameter.
+ State.Stack.back().LastSpace = State.Column;
+ }
+
+ return moveStateToNextToken(State, DryRun);
+ }
+
+ /// \brief Mark the next token as consumed in \p State and modify its stacks
+ /// accordingly.
+ unsigned moveStateToNextToken(LineState &State, bool DryRun) {
+ const AnnotatedToken &Current = *State.NextToken;
+ assert(State.Stack.size());
+
+ if (Current.Type == TT_InheritanceColon)
+ State.Stack.back().AvoidBinPacking = true;
+ if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0)
+ State.Stack.back().FirstLessLess = State.Column;
+ if (Current.is(tok::question))
+ State.Stack.back().QuestionColumn = State.Column;
+ if (Current.isOneOf(tok::period, tok::arrow) &&
+ Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0)
+ State.Stack.back().StartOfFunctionCall =
+ Current.LastInChainOfCalls ? 0 : State.Column;
+ if (Current.Type == TT_CtorInitializerColon) {
+ if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine)
+ State.Stack.back().AvoidBinPacking = true;
+ State.Stack.back().BreakBeforeParameter = false;
+ }
+
+ // In ObjC method declaration we align on the ":" of parameters, but we need
+ // to ensure that we indent parameters on subsequent lines by at least 4.
+ if (Current.Type == TT_ObjCMethodSpecifier)
+ State.Stack.back().Indent += 4;
+
+ // Insert scopes created by fake parenthesis.
+ for (unsigned i = 0, e = Current.FakeLParens; i != e; ++i) {
+ ParenState NewParenState = State.Stack.back();
+ NewParenState.Indent = std::max(State.Column, State.Stack.back().Indent);
+ NewParenState.BreakBeforeParameter = false;
+ State.Stack.push_back(NewParenState);
+ }
+
+ // If we encounter an opening (, [, { or <, we add a level to our stacks to
+ // prepare for the following tokens.
+ if (Current.isOneOf(tok::l_paren, tok::l_square, tok::l_brace) ||
+ State.NextToken->Type == TT_TemplateOpener) {
+ unsigned NewIndent;
+ bool AvoidBinPacking;
+ if (Current.is(tok::l_brace)) {
+ NewIndent = 2 + State.Stack.back().LastSpace;
+ AvoidBinPacking = false;
+ } else {
+ NewIndent = 4 + std::max(State.Stack.back().LastSpace,
+ State.Stack.back().StartOfFunctionCall);
+ AvoidBinPacking =
+ !Style.BinPackParameters || State.Stack.back().AvoidBinPacking;
+ }
+ State.Stack.push_back(
+ ParenState(NewIndent, State.Stack.back().LastSpace, AvoidBinPacking,
+ State.Stack.back().HasMultiParameterLine));
+ ++State.ParenLevel;
+ }
+
+ // If this '[' opens an ObjC call, determine whether all parameters fit into
+ // one line and put one per line if they don't.
+ if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr &&
+ Current.MatchingParen != NULL) {
+ if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit())
+ State.Stack.back().BreakBeforeParameter = true;
+ }
+
+ // If we encounter a closing ), ], } or >, we can remove a level from our
+ // stacks.
+ if (Current.isOneOf(tok::r_paren, tok::r_square) ||
+ (Current.is(tok::r_brace) && State.NextToken != &RootToken) ||
+ State.NextToken->Type == TT_TemplateCloser) {
+ State.Stack.pop_back();
+ --State.ParenLevel;
+ }
+
+ // Remove scopes created by fake parenthesis.
+ for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) {
+ unsigned VariablePos = State.Stack.back().VariablePos;
+ State.Stack.pop_back();
+ State.Stack.back().VariablePos = VariablePos;
+ }
+
+ if (Current.is(tok::string_literal)) {
+ State.StartOfStringLiteral = State.Column;
+ } else if (Current.isNot(tok::comment)) {
+ State.StartOfStringLiteral = 0;
+ }
+
+ State.Column += Current.FormatTok.TokenLength;
+
+ if (State.NextToken->Children.empty())
+ State.NextToken = NULL;
+ else
+ State.NextToken = &State.NextToken->Children[0];
+
+ return breakProtrudingToken(Current, State, DryRun);
+ }
+
+ /// \brief If the current token sticks out over the end of the line, break
+ /// it if possible.
+ unsigned breakProtrudingToken(const AnnotatedToken &Current, LineState &State,
+ bool DryRun) {
+ if (Current.isNot(tok::string_literal))
+ return 0;
+ // Only break up default narrow strings.
+ const char *LiteralData = Current.FormatTok.Tok.getLiteralData();
+ if (!LiteralData || *LiteralData != '"')
+ return 0;
+
+ unsigned Penalty = 0;
+ unsigned TailOffset = 0;
+ unsigned TailLength = Current.FormatTok.TokenLength;
+ unsigned StartColumn = State.Column - Current.FormatTok.TokenLength;
+ unsigned OffsetFromStart = 0;
+ while (StartColumn + TailLength > getColumnLimit()) {
+ StringRef Text = StringRef(LiteralData + TailOffset, TailLength);
+ if (StartColumn + OffsetFromStart + 1 > getColumnLimit())
+ break;
+ StringRef::size_type SplitPoint = getSplitPoint(
+ Text, getColumnLimit() - StartColumn - OffsetFromStart - 1);
+ if (SplitPoint == StringRef::npos)
+ break;
+ assert(SplitPoint != 0);
+ // +2, because 'Text' starts after the opening quotes, and does not
+ // include the closing quote we need to insert.
+ unsigned WhitespaceStartColumn =
+ StartColumn + OffsetFromStart + SplitPoint + 2;
+ State.Stack.back().LastSpace = StartColumn;
+ if (!DryRun) {
+ Whitespaces.breakToken(Current.FormatTok, TailOffset + SplitPoint + 1,
+ 0, "\"", "\"", Line.InPPDirective, StartColumn,
+ WhitespaceStartColumn);
+ }
+ TailOffset += SplitPoint + 1;
+ TailLength -= SplitPoint + 1;
+ OffsetFromStart = 1;
+ Penalty += Style.PenaltyExcessCharacter;
+ for (unsigned i = 0, e = State.Stack.size(); i != e; ++i)
+ State.Stack[i].BreakBeforeParameter = true;
+ }
+ State.Column = StartColumn + TailLength;
+ return Penalty;
+ }
+
+ StringRef::size_type
+ getSplitPoint(StringRef Text, StringRef::size_type Offset) {
+ StringRef::size_type SpaceOffset = Text.rfind(' ', Offset);
+ if (SpaceOffset != StringRef::npos && SpaceOffset != 0)
+ return SpaceOffset;
+ StringRef::size_type SlashOffset = Text.rfind('/', Offset);
+ if (SlashOffset != StringRef::npos && SlashOffset != 0)
+ return SlashOffset;
+ StringRef::size_type Split = getStartOfCharacter(Text, Offset);
+ if (Split != StringRef::npos && Split > 1)
+ // Do not split at 0.
+ return Split - 1;
+ return StringRef::npos;
+ }
+
+ StringRef::size_type
+ getStartOfCharacter(StringRef Text, StringRef::size_type Offset) {
+ StringRef::size_type NextEscape = Text.find('\\');
+ while (NextEscape != StringRef::npos && NextEscape < Offset) {
+ StringRef::size_type SequenceLength =
+ getEscapeSequenceLength(Text.substr(NextEscape));
+ if (Offset < NextEscape + SequenceLength)
+ return NextEscape;
+ NextEscape = Text.find('\\', NextEscape + SequenceLength);
+ }
+ return Offset;
+ }
+
+ unsigned getEscapeSequenceLength(StringRef Text) {
+ assert(Text[0] == '\\');
+ if (Text.size() < 2)
+ return 1;
+
+ switch (Text[1]) {
+ case 'u':
+ return 6;
+ case 'U':
+ return 10;
+ case 'x':
+ return getHexLength(Text);
+ default:
+ if (Text[1] >= '0' && Text[1] <= '7')
+ return getOctalLength(Text);
+ return 2;
+ }
+ }
+
+ unsigned getHexLength(StringRef Text) {
+ unsigned I = 2; // Point after '\x'.
+ while (I < Text.size() && ((Text[I] >= '0' && Text[I] <= '9') ||
+ (Text[I] >= 'a' && Text[I] <= 'f') ||
+ (Text[I] >= 'A' && Text[I] <= 'F'))) {
+ ++I;
+ }
+ return I;
+ }
+
+ unsigned getOctalLength(StringRef Text) {
+ unsigned I = 1;
+ while (I < Text.size() && I < 4 && (Text[I] >= '0' && Text[I] <= '7')) {
+ ++I;
+ }
+ return I;
+ }
+
+ unsigned getColumnLimit() {
+ return calculateColumnLimit(Style, Line.InPPDirective);
+ }
+
+ /// \brief An edge in the solution space from \c Previous->State to \c State,
+ /// inserting a newline dependent on the \c NewLine.
+ struct StateNode {
+ StateNode(const LineState &State, bool NewLine, StateNode *Previous)
+ : State(State), NewLine(NewLine), Previous(Previous) {}
+ LineState State;
+ bool NewLine;
+ StateNode *Previous;
+ };
+
+ /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
+ ///
+ /// In case of equal penalties, we want to prefer states that were inserted
+ /// first. During state generation we make sure that we insert states first
+ /// that break the line as late as possible.
+ typedef std::pair<unsigned, unsigned> OrderedPenalty;
+
+ /// \brief An item in the prioritized BFS search queue. The \c StateNode's
+ /// \c State has the given \c OrderedPenalty.
+ typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
+
+ /// \brief The BFS queue type.
+ typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
+ std::greater<QueueItem> > QueueType;
+
+ /// \brief Analyze the entire solution space starting from \p InitialState.
+ ///
+ /// This implements a variant of Dijkstra's algorithm on the graph that spans
+ /// the solution space (\c LineStates are the nodes). The algorithm tries to
+ /// find the shortest path (the one with lowest penalty) from \p InitialState
+ /// to a state where all tokens are placed.
+ unsigned analyzeSolutionSpace(LineState &InitialState) {
+ std::set<LineState> Seen;
+
+ // Insert start element into queue.
+ StateNode *Node =
+ new (Allocator.Allocate()) StateNode(InitialState, false, NULL);
+ Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
+ ++Count;
+
+ // While not empty, take first element and follow edges.
+ while (!Queue.empty()) {
+ unsigned Penalty = Queue.top().first.first;
+ StateNode *Node = Queue.top().second;
+ if (Node->State.NextToken == NULL) {
+ DEBUG(llvm::errs() << "\n---\nPenalty for line: " << Penalty << "\n");
+ break;
+ }
+ Queue.pop();
+
+ if (!Seen.insert(Node->State).second)
+ // State already examined with lower penalty.
+ continue;
+
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/ false);
+ addNextStateToQueue(Penalty, Node, /*NewLine=*/ true);
+ }
+
+ if (Queue.empty())
+ // We were unable to find a solution, do nothing.
+ // FIXME: Add diagnostic?
+ return 0;
+
+ // Reconstruct the solution.
+ reconstructPath(InitialState, Queue.top().second);
+ DEBUG(llvm::errs() << "---\n");
+
+ // Return the column after the last token of the solution.
+ return Queue.top().second->State.Column;
+ }
+
+ void reconstructPath(LineState &State, StateNode *Current) {
+ // FIXME: This recursive implementation limits the possible number
+ // of tokens per line if compiled into a binary with small stack space.
+ // To become more independent of stack frame limitations we would need
+ // to also change the TokenAnnotator.
+ if (Current->Previous == NULL)
+ return;
+ reconstructPath(State, Current->Previous);
+ DEBUG({
+ if (Current->NewLine) {
+ llvm::errs()
+ << "Penalty for splitting before "
+ << Current->Previous->State.NextToken->FormatTok.Tok.getName()
+ << ": " << Current->Previous->State.NextToken->SplitPenalty << "\n";
+ }
+ });
+ addTokenToState(Current->NewLine, false, State);
+ }
+
+ /// \brief Add the following state to the analysis queue \c Queue.
+ ///
+ /// Assume the current state is \p PreviousNode and has been reached with a
+ /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
+ void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
+ bool NewLine) {
+ if (NewLine && !canBreak(PreviousNode->State))
+ return;
+ if (!NewLine && mustBreak(PreviousNode->State))
+ return;
+ if (NewLine)
+ Penalty += PreviousNode->State.NextToken->SplitPenalty;
+
+ StateNode *Node = new (Allocator.Allocate())
+ StateNode(PreviousNode->State, NewLine, PreviousNode);
+ Penalty += addTokenToState(NewLine, true, Node->State);
+ if (Node->State.Column > getColumnLimit()) {
+ unsigned ExcessCharacters = Node->State.Column - getColumnLimit();
+ Penalty += Style.PenaltyExcessCharacter * ExcessCharacters;
+ }
+
+ Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node));
+ ++Count;
+ }
+
+ /// \brief Returns \c true, if a line break after \p State is allowed.
+ bool canBreak(const LineState &State) {
+ if (!State.NextToken->CanBreakBefore &&
+ !(State.NextToken->is(tok::r_brace) &&
+ State.Stack.back().BreakBeforeClosingBrace))
+ return false;
+ // Trying to insert a parameter on a new line if there are already more than
+ // one parameter on the current line is bin packing.
+ if (State.Stack.back().HasMultiParameterLine &&
+ State.Stack.back().AvoidBinPacking)
+ return false;
+ return true;
+ }
+
+ /// \brief Returns \c true, if a line break after \p State is mandatory.
+ bool mustBreak(const LineState &State) {
+ if (State.NextToken->MustBreakBefore)
+ return true;
+ if (State.NextToken->is(tok::r_brace) &&
+ State.Stack.back().BreakBeforeClosingBrace)
+ return true;
+ if (State.NextToken->Parent->is(tok::semi) &&
+ State.LineContainsContinuedForLoopSection)
+ return true;
+ if ((State.NextToken->Parent->isOneOf(tok::comma, tok::semi) ||
+ State.NextToken->is(tok::question) ||
+ State.NextToken->Type == TT_ConditionalExpr) &&
+ State.Stack.back().BreakBeforeParameter &&
+ !isTrailingComment(*State.NextToken) &&
+ State.NextToken->isNot(tok::r_paren) &&
+ State.NextToken->isNot(tok::r_brace))
+ return true;
+ // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding
+ // out whether it is the first parameter. Clean this up.
+ if (State.NextToken->Type == TT_ObjCSelectorName &&
+ State.NextToken->LongestObjCSelectorName == 0 &&
+ State.Stack.back().BreakBeforeParameter)
+ return true;
+ if ((State.NextToken->Type == TT_CtorInitializerColon ||
+ (State.NextToken->Parent->ClosesTemplateDeclaration &&
+ State.ParenLevel == 0)))
+ return true;
+ if (State.NextToken->Type == TT_InlineASMColon)
+ return true;
+ // This prevents breaks like:
+ // ...
+ // SomeParameter, OtherParameter).DoSomething(
+ // ...
+ // As they hide "DoSomething" and generally bad for readability.
+ if (State.NextToken->isOneOf(tok::period, tok::arrow) &&
+ getRemainingLength(State) + State.Column > getColumnLimit() &&
+ State.ParenLevel < State.StartOfLineLevel)
+ return true;
+ return false;
+ }
+
+ // Returns the total number of columns required for the remaining tokens.
+ unsigned getRemainingLength(const LineState &State) {
+ if (State.NextToken && State.NextToken->Parent)
+ return Line.Last->TotalLength - State.NextToken->Parent->TotalLength;
+ return 0;
+ }
+
+ FormatStyle Style;
+ SourceManager &SourceMgr;
+ const AnnotatedLine &Line;
+ const unsigned FirstIndent;
+ const AnnotatedToken &RootToken;
+ WhitespaceManager &Whitespaces;
+
+ llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
+ QueueType Queue;
+ // Increasing count of \c StateNode items we have created. This is used
+ // to create a deterministic order independent of the container.
+ unsigned Count;
+};
+
+class LexerBasedFormatTokenSource : public FormatTokenSource {
+public:
+ LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
+ : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
+ IdentTable(Lex.getLangOpts()) {
+ Lex.SetKeepWhitespaceMode(true);
+ }
+
+ virtual FormatToken getNextToken() {
+ if (GreaterStashed) {
+ FormatTok.NewlinesBefore = 0;
+ FormatTok.WhiteSpaceStart =
+ FormatTok.Tok.getLocation().getLocWithOffset(1);
+ FormatTok.WhiteSpaceLength = 0;
+ GreaterStashed = false;
+ return FormatTok;
+ }
+
+ FormatTok = FormatToken();
+ Lex.LexFromRawLexer(FormatTok.Tok);
+ StringRef Text = rawTokenText(FormatTok.Tok);
+ FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
+ if (SourceMgr.getFileOffset(FormatTok.WhiteSpaceStart) == 0)
+ FormatTok.IsFirst = true;
+
+ // Consume and record whitespace until we find a significant token.
+ while (FormatTok.Tok.is(tok::unknown)) {
+ unsigned Newlines = Text.count('\n');
+ if (Newlines > 0)
+ FormatTok.LastNewlineOffset =
+ FormatTok.WhiteSpaceLength + Text.rfind('\n') + 1;
+ unsigned EscapedNewlines = Text.count("\\\n");
+ FormatTok.NewlinesBefore += Newlines;
+ FormatTok.HasUnescapedNewline |= EscapedNewlines != Newlines;
+ FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
+
+ if (FormatTok.Tok.is(tok::eof))
+ return FormatTok;
+ Lex.LexFromRawLexer(FormatTok.Tok);
+ Text = rawTokenText(FormatTok.Tok);
+ }
+
+ // Now FormatTok is the next non-whitespace token.
+ FormatTok.TokenLength = Text.size();
+
+ // In case the token starts with escaped newlines, we want to
+ // take them into account as whitespace - this pattern is quite frequent
+ // in macro definitions.
+ // FIXME: What do we want to do with other escaped spaces, and escaped
+ // spaces or newlines in the middle of tokens?
+ // FIXME: Add a more explicit test.
+ unsigned i = 0;
+ while (i + 1 < Text.size() && Text[i] == '\\' && Text[i + 1] == '\n') {
+ // FIXME: ++FormatTok.NewlinesBefore is missing...
+ FormatTok.WhiteSpaceLength += 2;
+ FormatTok.TokenLength -= 2;
+ i += 2;
+ }
+
+ if (FormatTok.Tok.is(tok::raw_identifier)) {
+ IdentifierInfo &Info = IdentTable.get(Text);
+ FormatTok.Tok.setIdentifierInfo(&Info);
+ FormatTok.Tok.setKind(Info.getTokenID());
+ }
+
+ if (FormatTok.Tok.is(tok::greatergreater)) {
+ FormatTok.Tok.setKind(tok::greater);
+ FormatTok.TokenLength = 1;
+ GreaterStashed = true;
+ }
+
+ // If we reformat comments, we remove trailing whitespace. Update the length
+ // accordingly.
+ if (FormatTok.Tok.is(tok::comment))
+ FormatTok.TokenLength = Text.rtrim().size();
+
+ return FormatTok;
+ }
+
+ IdentifierTable &getIdentTable() { return IdentTable; }
+
+private:
+ FormatToken FormatTok;
+ bool GreaterStashed;
+ Lexer &Lex;
+ SourceManager &SourceMgr;
+ IdentifierTable IdentTable;
+
+ /// Returns the text of \c FormatTok.
+ StringRef rawTokenText(Token &Tok) {
+ return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
+ Tok.getLength());
+ }
+};
+
+class Formatter : public UnwrappedLineConsumer {
+public:
+ Formatter(DiagnosticsEngine &Diag, const FormatStyle &Style, Lexer &Lex,
+ SourceManager &SourceMgr,
+ const std::vector<CharSourceRange> &Ranges)
+ : Diag(Diag), Style(Style), Lex(Lex), SourceMgr(SourceMgr),
+ Whitespaces(SourceMgr, Style), Ranges(Ranges) {}
+
+ virtual ~Formatter() {}
+
+ tooling::Replacements format() {
+ LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
+ UnwrappedLineParser Parser(Diag, Style, Tokens, *this);
+ StructuralError = Parser.parse();
+ unsigned PreviousEndOfLineColumn = 0;
+ TokenAnnotator Annotator(Style, SourceMgr, Lex,
+ Tokens.getIdentTable().get("in"));
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ Annotator.annotate(AnnotatedLines[i]);
+ }
+ deriveLocalStyle();
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ Annotator.calculateFormattingInformation(AnnotatedLines[i]);
+
+ // Adapt level to the next line if this is a comment.
+ // FIXME: Can/should this be done in the UnwrappedLineParser?
+ if (i + 1 != e && AnnotatedLines[i].First.is(tok::comment) &&
+ AnnotatedLines[i].First.Children.empty() &&
+ AnnotatedLines[i + 1].First.isNot(tok::r_brace))
+ AnnotatedLines[i].Level = AnnotatedLines[i + 1].Level;
+ }
+ std::vector<int> IndentForLevel;
+ bool PreviousLineWasTouched = false;
+ const AnnotatedToken *PreviousLineLastToken = 0;
+ for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(),
+ E = AnnotatedLines.end();
+ I != E; ++I) {
+ const AnnotatedLine &TheLine = *I;
+ const FormatToken &FirstTok = TheLine.First.FormatTok;
+ int Offset = getIndentOffset(TheLine.First);
+ while (IndentForLevel.size() <= TheLine.Level)
+ IndentForLevel.push_back(-1);
+ IndentForLevel.resize(TheLine.Level + 1);
+ bool WasMoved = PreviousLineWasTouched && FirstTok.NewlinesBefore == 0;
+ if (TheLine.First.is(tok::eof)) {
+ if (PreviousLineWasTouched) {
+ unsigned NewLines = std::min(FirstTok.NewlinesBefore, 1u);
+ Whitespaces.replaceWhitespace(TheLine.First, NewLines, /*Indent*/ 0,
+ /*WhitespaceStartColumn*/ 0);
+ }
+ } else if (TheLine.Type != LT_Invalid &&
+ (WasMoved || touchesLine(TheLine))) {
+ unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level);
+ unsigned Indent = LevelIndent;
+ if (static_cast<int>(Indent) + Offset >= 0)
+ Indent += Offset;
+ if (!FirstTok.WhiteSpaceStart.isValid() || StructuralError) {
+ Indent = LevelIndent =
+ SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
+ } else {
+ formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
+ TheLine.InPPDirective, PreviousEndOfLineColumn);
+ }
+ tryFitMultipleLinesInOne(Indent, I, E);
+ UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent,
+ TheLine.First, Whitespaces,
+ StructuralError);
+ PreviousEndOfLineColumn =
+ Formatter.format(I + 1 != E ? &*(I + 1) : NULL);
+ IndentForLevel[TheLine.Level] = LevelIndent;
+ PreviousLineWasTouched = true;
+ } else {
+ if (FirstTok.NewlinesBefore > 0 || FirstTok.IsFirst) {
+ unsigned Indent =
+ SourceMgr.getSpellingColumnNumber(FirstTok.Tok.getLocation()) - 1;
+ unsigned LevelIndent = Indent;
+ if (static_cast<int>(LevelIndent) - Offset >= 0)
+ LevelIndent -= Offset;
+ if (TheLine.First.isNot(tok::comment))
+ IndentForLevel[TheLine.Level] = LevelIndent;
+
+ // Remove trailing whitespace of the previous line if it was touched.
+ if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine))
+ formatFirstToken(TheLine.First, PreviousLineLastToken, Indent,
+ TheLine.InPPDirective, PreviousEndOfLineColumn);
+ }
+ // If we did not reformat this unwrapped line, the column at the end of
+ // the last token is unchanged - thus, we can calculate the end of the
+ // last token.
+ SourceLocation LastLoc = TheLine.Last->FormatTok.Tok.getLocation();
+ PreviousEndOfLineColumn =
+ SourceMgr.getSpellingColumnNumber(LastLoc) +
+ Lex.MeasureTokenLength(LastLoc, SourceMgr, Lex.getLangOpts()) - 1;
+ PreviousLineWasTouched = false;
+ if (TheLine.Last->is(tok::comment))
+ Whitespaces.addUntouchableComment(SourceMgr.getSpellingColumnNumber(
+ TheLine.Last->FormatTok.Tok.getLocation()) - 1);
+ }
+ PreviousLineLastToken = I->Last;
+ }
+ return Whitespaces.generateReplacements();
+ }
+
+private:
+ void deriveLocalStyle() {
+ unsigned CountBoundToVariable = 0;
+ unsigned CountBoundToType = 0;
+ bool HasCpp03IncompatibleFormat = false;
+ for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
+ if (AnnotatedLines[i].First.Children.empty())
+ continue;
+ AnnotatedToken *Tok = &AnnotatedLines[i].First.Children[0];
+ while (!Tok->Children.empty()) {
+ if (Tok->Type == TT_PointerOrReference) {
+ bool SpacesBefore = Tok->FormatTok.WhiteSpaceLength > 0;
+ bool SpacesAfter = Tok->Children[0].FormatTok.WhiteSpaceLength > 0;
+ if (SpacesBefore && !SpacesAfter)
+ ++CountBoundToVariable;
+ else if (!SpacesBefore && SpacesAfter)
+ ++CountBoundToType;
+ }
+
+ if (Tok->Type == TT_TemplateCloser &&
+ Tok->Parent->Type == TT_TemplateCloser &&
+ Tok->FormatTok.WhiteSpaceLength == 0)
+ HasCpp03IncompatibleFormat = true;
+ Tok = &Tok->Children[0];
+ }
+ }
+ if (Style.DerivePointerBinding) {
+ if (CountBoundToType > CountBoundToVariable)
+ Style.PointerBindsToType = true;
+ else if (CountBoundToType < CountBoundToVariable)
+ Style.PointerBindsToType = false;
+ }
+ if (Style.Standard == FormatStyle::LS_Auto) {
+ Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11
+ : FormatStyle::LS_Cpp03;
+ }
+ }
+
+ /// \brief Get the indent of \p Level from \p IndentForLevel.
+ ///
+ /// \p IndentForLevel must contain the indent for the level \c l
+ /// at \p IndentForLevel[l], or a value < 0 if the indent for
+ /// that level is unknown.
+ unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) {
+ if (IndentForLevel[Level] != -1)
+ return IndentForLevel[Level];
+ if (Level == 0)
+ return 0;
+ return getIndent(IndentForLevel, Level - 1) + 2;
+ }
+
+ /// \brief Get the offset of the line relatively to the level.
+ ///
+ /// For example, 'public:' labels in classes are offset by 1 or 2
+ /// characters to the left from their level.
+ int getIndentOffset(const AnnotatedToken &RootToken) {
+ if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier())
+ return Style.AccessModifierOffset;
+ return 0;
+ }
+
+ /// \brief Tries to merge lines into one.
+ ///
+ /// This will change \c Line and \c AnnotatedLine to contain the merged line,
+ /// if possible; note that \c I will be incremented when lines are merged.
+ ///
+ /// Returns whether the resulting \c Line can fit in a single line.
+ void tryFitMultipleLinesInOne(unsigned Indent,
+ std::vector<AnnotatedLine>::iterator &I,
+ std::vector<AnnotatedLine>::iterator E) {
+ // We can never merge stuff if there are trailing line comments.
+ if (I->Last->Type == TT_LineComment)
+ return;
+
+ unsigned Limit = Style.ColumnLimit - Indent;
+ // If we already exceed the column limit, we set 'Limit' to 0. The different
+ // tryMerge..() functions can then decide whether to still do merging.
+ Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength;
+
+ if (I + 1 == E || (I + 1)->Type == LT_Invalid)
+ return;
+
+ if (I->Last->is(tok::l_brace)) {
+ tryMergeSimpleBlock(I, E, Limit);
+ } else if (I->First.is(tok::kw_if)) {
+ tryMergeSimpleIf(I, E, Limit);
+ } else if (I->InPPDirective && (I->First.FormatTok.HasUnescapedNewline ||
+ I->First.FormatTok.IsFirst)) {
+ tryMergeSimplePPDirective(I, E, Limit);
+ }
+ return;
+ }
+
+ void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I,
+ std::vector<AnnotatedLine>::iterator E,
+ unsigned Limit) {
+ if (Limit == 0)
+ return;
+ AnnotatedLine &Line = *I;
+ if (!(I + 1)->InPPDirective || (I + 1)->First.FormatTok.HasUnescapedNewline)
+ return;
+ if (I + 2 != E && (I + 2)->InPPDirective &&
+ !(I + 2)->First.FormatTok.HasUnescapedNewline)
+ return;
+ if (1 + (I + 1)->Last->TotalLength > Limit)
+ return;
+ join(Line, *(++I));
+ }
+
+ void tryMergeSimpleIf(std::vector<AnnotatedLine>::iterator &I,
+ std::vector<AnnotatedLine>::iterator E,
+ unsigned Limit) {
+ if (Limit == 0)
+ return;
+ if (!Style.AllowShortIfStatementsOnASingleLine)
+ return;
+ if ((I + 1)->InPPDirective != I->InPPDirective ||
+ ((I + 1)->InPPDirective &&
+ (I + 1)->First.FormatTok.HasUnescapedNewline))
+ return;
+ AnnotatedLine &Line = *I;
+ if (Line.Last->isNot(tok::r_paren))
+ return;
+ if (1 + (I + 1)->Last->TotalLength > Limit)
+ return;
+ if ((I + 1)->First.is(tok::kw_if) || (I + 1)->First.Type == TT_LineComment)
+ return;
+ // Only inline simple if's (no nested if or else).
+ if (I + 2 != E && (I + 2)->First.is(tok::kw_else))
+ return;
+ join(Line, *(++I));
+ }
+
+ void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I,
+ std::vector<AnnotatedLine>::iterator E,
+ unsigned Limit) {
+ // First, check that the current line allows merging. This is the case if
+ // we're not in a control flow statement and the last token is an opening
+ // brace.
+ AnnotatedLine &Line = *I;
+ if (Line.First.isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace,
+ tok::kw_else, tok::kw_try, tok::kw_catch,
+ tok::kw_for,
+ // This gets rid of all ObjC @ keywords and methods.
+ tok::at, tok::minus, tok::plus))
+ return;
+
+ AnnotatedToken *Tok = &(I + 1)->First;
+ if (Tok->Children.empty() && Tok->is(tok::r_brace) &&
+ !Tok->MustBreakBefore) {
+ // We merge empty blocks even if the line exceeds the column limit.
+ Tok->SpacesRequiredBefore = 0;
+ Tok->CanBreakBefore = true;
+ join(Line, *(I + 1));
+ I += 1;
+ } else if (Limit != 0) {
+ // Check that we still have three lines and they fit into the limit.
+ if (I + 2 == E || (I + 2)->Type == LT_Invalid ||
+ !nextTwoLinesFitInto(I, Limit))
+ return;
+
+ // Second, check that the next line does not contain any braces - if it
+ // does, readability declines when putting it into a single line.
+ if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore)
+ return;
+ do {
+ if (Tok->isOneOf(tok::l_brace, tok::r_brace))
+ return;
+ Tok = Tok->Children.empty() ? NULL : &Tok->Children.back();
+ } while (Tok != NULL);
+
+ // Last, check that the third line contains a single closing brace.
+ Tok = &(I + 2)->First;
+ if (!Tok->Children.empty() || Tok->isNot(tok::r_brace) ||
+ Tok->MustBreakBefore)
+ return;
+
+ join(Line, *(I + 1));
+ join(Line, *(I + 2));
+ I += 2;
+ }
+ }
+
+ bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I,
+ unsigned Limit) {
+ return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <=
+ Limit;
+ }
+
+ void join(AnnotatedLine &A, const AnnotatedLine &B) {
+ unsigned LengthA = A.Last->TotalLength + B.First.SpacesRequiredBefore;
+ A.Last->Children.push_back(B.First);
+ while (!A.Last->Children.empty()) {
+ A.Last->Children[0].Parent = A.Last;
+ A.Last->Children[0].TotalLength += LengthA;
+ A.Last = &A.Last->Children[0];
+ }
+ }
+
+ bool touchesRanges(const CharSourceRange &Range) {
+ for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
+ if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(),
+ Ranges[i].getBegin()) &&
+ !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
+ Range.getBegin()))
+ return true;
+ }
+ return false;
+ }
+
+ bool touchesLine(const AnnotatedLine &TheLine) {
+ const FormatToken *First = &TheLine.First.FormatTok;
+ const FormatToken *Last = &TheLine.Last->FormatTok;
+ CharSourceRange LineRange = CharSourceRange::getTokenRange(
+ First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset),
+ Last->Tok.getLocation());
+ return touchesRanges(LineRange);
+ }
+
+ bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) {
+ const FormatToken *First = &TheLine.First.FormatTok;
+ CharSourceRange LineRange = CharSourceRange::getCharRange(
+ First->WhiteSpaceStart,
+ First->WhiteSpaceStart.getLocWithOffset(First->LastNewlineOffset));
+ return touchesRanges(LineRange);
+ }
+
+ virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
+ AnnotatedLines.push_back(AnnotatedLine(TheLine));
+ }
+
+ /// \brief Add a new line and the required indent before the first Token
+ /// of the \c UnwrappedLine if there was no structural parsing error.
+ /// Returns the indent level of the \c UnwrappedLine.
+ void formatFirstToken(const AnnotatedToken &RootToken,
+ const AnnotatedToken *PreviousToken, unsigned Indent,
+ bool InPPDirective, unsigned PreviousEndOfLineColumn) {
+ const FormatToken &Tok = RootToken.FormatTok;
+
+ unsigned Newlines =
+ std::min(Tok.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
+ if (Newlines == 0 && !Tok.IsFirst)
+ Newlines = 1;
+
+ if (!InPPDirective || Tok.HasUnescapedNewline) {
+ // Insert extra new line before access specifiers.
+ if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) &&
+ RootToken.isAccessSpecifier() && Tok.NewlinesBefore == 1)
+ ++Newlines;
+
+ Whitespaces.replaceWhitespace(RootToken, Newlines, Indent, 0);
+ } else {
+ Whitespaces.replacePPWhitespace(RootToken, Newlines, Indent,
+ PreviousEndOfLineColumn);
+ }
+ }
+
+ DiagnosticsEngine &Diag;
+ FormatStyle Style;
+ Lexer &Lex;
+ SourceManager &SourceMgr;
+ WhitespaceManager Whitespaces;
+ std::vector<CharSourceRange> Ranges;
+ std::vector<AnnotatedLine> AnnotatedLines;
+ bool StructuralError;
+};
+
+tooling::Replacements
+reformat(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
+ std::vector<CharSourceRange> Ranges, DiagnosticConsumer *DiagClient) {
+ IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+ OwningPtr<DiagnosticConsumer> DiagPrinter;
+ if (DiagClient == 0) {
+ DiagPrinter.reset(new TextDiagnosticPrinter(llvm::errs(), &*DiagOpts));
+ DiagPrinter->BeginSourceFile(Lex.getLangOpts(), Lex.getPP());
+ DiagClient = DiagPrinter.get();
+ }
+ DiagnosticsEngine Diagnostics(
+ IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs()), &*DiagOpts,
+ DiagClient, false);
+ Diagnostics.setSourceManager(&SourceMgr);
+ Formatter formatter(Diagnostics, Style, Lex, SourceMgr, Ranges);
+ return formatter.format();
+}
+
+LangOptions getFormattingLangOpts() {
+ LangOptions LangOpts;
+ LangOpts.CPlusPlus = 1;
+ LangOpts.CPlusPlus11 = 1;
+ LangOpts.LineComment = 1;
+ LangOpts.Bool = 1;
+ LangOpts.ObjC1 = 1;
+ LangOpts.ObjC2 = 1;
+ return LangOpts;
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
new file mode 100644
index 000000000000..427157e3322d
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.cpp
@@ -0,0 +1,1187 @@
+//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#include "TokenAnnotator.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Lex/Lexer.h"
+
+namespace clang {
+namespace format {
+
+static bool isUnaryOperator(const AnnotatedToken &Tok) {
+ switch (Tok.FormatTok.Tok.getKind()) {
+ case tok::plus:
+ case tok::plusplus:
+ case tok::minus:
+ case tok::minusminus:
+ case tok::exclaim:
+ case tok::tilde:
+ case tok::kw_sizeof:
+ case tok::kw_alignof:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool isBinaryOperator(const AnnotatedToken &Tok) {
+ // Comma is a binary operator, but does not behave as such wrt. formatting.
+ return getPrecedence(Tok) > prec::Comma;
+}
+
+// Returns the previous token ignoring comments.
+static AnnotatedToken *getPreviousToken(AnnotatedToken &Tok) {
+ AnnotatedToken *PrevToken = Tok.Parent;
+ while (PrevToken != NULL && PrevToken->is(tok::comment))
+ PrevToken = PrevToken->Parent;
+ return PrevToken;
+}
+static const AnnotatedToken *getPreviousToken(const AnnotatedToken &Tok) {
+ return getPreviousToken(const_cast<AnnotatedToken &>(Tok));
+}
+
+static bool isTrailingComment(AnnotatedToken *Tok) {
+ return Tok != NULL && Tok->is(tok::comment) &&
+ (Tok->Children.empty() ||
+ Tok->Children[0].FormatTok.NewlinesBefore > 0);
+}
+
+// Returns the next token ignoring comments.
+static const AnnotatedToken *getNextToken(const AnnotatedToken &Tok) {
+ if (Tok.Children.empty())
+ return NULL;
+ const AnnotatedToken *NextToken = &Tok.Children[0];
+ while (NextToken->is(tok::comment)) {
+ if (NextToken->Children.empty())
+ return NULL;
+ NextToken = &NextToken->Children[0];
+ }
+ return NextToken;
+}
+
+static bool closesScope(const AnnotatedToken &Tok) {
+ return Tok.isOneOf(tok::r_paren, tok::r_brace, tok::r_square) ||
+ Tok.Type == TT_TemplateCloser;
+}
+
+static bool opensScope(const AnnotatedToken &Tok) {
+ return Tok.isOneOf(tok::l_paren, tok::l_brace, tok::l_square) ||
+ Tok.Type == TT_TemplateOpener;
+}
+
+/// \brief A parser that gathers additional information about tokens.
+///
+/// The \c TokenAnnotator tries to match parenthesis and square brakets and
+/// store a parenthesis levels. It also tries to resolve matching "<" and ">"
+/// into template parameter lists.
+class AnnotatingParser {
+public:
+ AnnotatingParser(SourceManager &SourceMgr, Lexer &Lex, AnnotatedLine &Line,
+ IdentifierInfo &Ident_in)
+ : SourceMgr(SourceMgr), Lex(Lex), Line(Line), CurrentToken(&Line.First),
+ KeywordVirtualFound(false), Ident_in(Ident_in) {
+ Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/ false));
+ }
+
+private:
+ bool parseAngle() {
+ if (CurrentToken == NULL)
+ return false;
+ ScopedContextCreator ContextCreator(*this, tok::less, 10);
+ AnnotatedToken *Left = CurrentToken->Parent;
+ Contexts.back().IsExpression = false;
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::greater)) {
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ CurrentToken->Type = TT_TemplateCloser;
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
+ tok::pipepipe, tok::ampamp, tok::question,
+ tok::colon))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseParens(bool LookForDecls = false) {
+ if (CurrentToken == NULL)
+ return false;
+ ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
+
+ // FIXME: This is a bit of a hack. Do better.
+ Contexts.back().ColonIsForRangeExpr =
+ Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
+
+ bool StartsObjCMethodExpr = false;
+ AnnotatedToken *Left = CurrentToken->Parent;
+ if (CurrentToken->is(tok::caret)) {
+ // ^( starts a block.
+ Left->Type = TT_ObjCBlockLParen;
+ } else if (AnnotatedToken *MaybeSel = Left->Parent) {
+ // @selector( starts a selector.
+ if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Parent &&
+ MaybeSel->Parent->is(tok::at)) {
+ StartsObjCMethodExpr = true;
+ }
+ }
+
+ if (StartsObjCMethodExpr) {
+ Contexts.back().ColonIsObjCMethodExpr = true;
+ Left->Type = TT_ObjCMethodExpr;
+ }
+
+ while (CurrentToken != NULL) {
+ // LookForDecls is set when "if (" has been seen. Check for
+ // 'identifier' '*' 'identifier' followed by not '=' -- this
+ // '*' has to be a binary operator but determineStarAmpUsage() will
+ // categorize it as an unary operator, so set the right type here.
+ if (LookForDecls && !CurrentToken->Children.empty()) {
+ AnnotatedToken &Prev = *CurrentToken->Parent;
+ AnnotatedToken &Next = CurrentToken->Children[0];
+ if (Prev.Parent->is(tok::identifier) &&
+ Prev.isOneOf(tok::star, tok::amp, tok::ampamp) &&
+ CurrentToken->is(tok::identifier) && Next.isNot(tok::equal)) {
+ Prev.Type = TT_BinaryOperator;
+ LookForDecls = false;
+ }
+ }
+
+ if (CurrentToken->is(tok::r_paren)) {
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+
+ if (StartsObjCMethodExpr) {
+ CurrentToken->Type = TT_ObjCMethodExpr;
+ if (Contexts.back().FirstObjCSelectorName != NULL) {
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ }
+ }
+
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseSquare() {
+ if (!CurrentToken)
+ return false;
+
+ // A '[' could be an index subscript (after an indentifier or after
+ // ')' or ']'), it could be the start of an Objective-C method
+ // expression, or it could the the start of an Objective-C array literal.
+ AnnotatedToken *Left = CurrentToken->Parent;
+ AnnotatedToken *Parent = getPreviousToken(*Left);
+ bool StartsObjCMethodExpr =
+ Contexts.back().CanBeExpression &&
+ (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
+ tok::kw_return, tok::kw_throw) ||
+ isUnaryOperator(*Parent) || Parent->Type == TT_ObjCForIn ||
+ Parent->Type == TT_CastRParen ||
+ getBinOpPrecedence(Parent->FormatTok.Tok.getKind(), true, true) >
+ prec::Unknown);
+ ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
+ Contexts.back().IsExpression = true;
+ bool StartsObjCArrayLiteral = Parent && Parent->is(tok::at);
+
+ if (StartsObjCMethodExpr) {
+ Contexts.back().ColonIsObjCMethodExpr = true;
+ Left->Type = TT_ObjCMethodExpr;
+ } else if (StartsObjCArrayLiteral) {
+ Left->Type = TT_ObjCArrayLiteral;
+ }
+
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::r_square)) {
+ if (!CurrentToken->Children.empty() &&
+ CurrentToken->Children[0].is(tok::l_paren)) {
+ // An ObjC method call is rarely followed by an open parenthesis.
+ // FIXME: Do we incorrectly label ":" with this?
+ StartsObjCMethodExpr = false;
+ Left->Type = TT_Unknown;
+ }
+ if (StartsObjCMethodExpr) {
+ CurrentToken->Type = TT_ObjCMethodExpr;
+ // determineStarAmpUsage() thinks that '*' '[' is allocating an
+ // array of pointers, but if '[' starts a selector then '*' is a
+ // binary operator.
+ if (Parent != NULL && Parent->Type == TT_PointerOrReference)
+ Parent->Type = TT_BinaryOperator;
+ } else if (StartsObjCArrayLiteral) {
+ CurrentToken->Type = TT_ObjCArrayLiteral;
+ }
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ if (Contexts.back().FirstObjCSelectorName != NULL)
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseBrace() {
+ // Lines are fine to end with '{'.
+ if (CurrentToken == NULL)
+ return true;
+ ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
+ AnnotatedToken *Left = CurrentToken->Parent;
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::r_brace)) {
+ Left->MatchingParen = CurrentToken;
+ CurrentToken->MatchingParen = Left;
+ next();
+ return true;
+ }
+ if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
+ return false;
+ updateParameterCount(Left, CurrentToken);
+ if (!consumeToken())
+ return false;
+ }
+ return true;
+ }
+
+ void updateParameterCount(AnnotatedToken *Left, AnnotatedToken *Current) {
+ if (Current->is(tok::comma))
+ ++Left->ParameterCount;
+ else if (Left->ParameterCount == 0 && Current->isNot(tok::comment))
+ Left->ParameterCount = 1;
+ }
+
+ bool parseConditional() {
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::colon)) {
+ CurrentToken->Type = TT_ConditionalExpr;
+ next();
+ return true;
+ }
+ if (!consumeToken())
+ return false;
+ }
+ return false;
+ }
+
+ bool parseTemplateDeclaration() {
+ if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
+ CurrentToken->Type = TT_TemplateOpener;
+ next();
+ if (!parseAngle())
+ return false;
+ if (CurrentToken != NULL)
+ CurrentToken->Parent->ClosesTemplateDeclaration = true;
+ return true;
+ }
+ return false;
+ }
+
+ bool consumeToken() {
+ AnnotatedToken *Tok = CurrentToken;
+ next();
+ switch (Tok->FormatTok.Tok.getKind()) {
+ case tok::plus:
+ case tok::minus:
+ if (Tok->Parent == NULL && Line.MustBeDeclaration)
+ Tok->Type = TT_ObjCMethodSpecifier;
+ break;
+ case tok::colon:
+ if (Tok->Parent == NULL)
+ return false;
+ // Colons from ?: are handled in parseConditional().
+ if (Tok->Parent->is(tok::r_paren) && Contexts.size() == 1) {
+ Tok->Type = TT_CtorInitializerColon;
+ } else if (Contexts.back().ColonIsObjCMethodExpr ||
+ Line.First.Type == TT_ObjCMethodSpecifier) {
+ Tok->Type = TT_ObjCMethodExpr;
+ Tok->Parent->Type = TT_ObjCSelectorName;
+ if (Tok->Parent->FormatTok.TokenLength >
+ Contexts.back().LongestObjCSelectorName)
+ Contexts.back().LongestObjCSelectorName =
+ Tok->Parent->FormatTok.TokenLength;
+ if (Contexts.back().FirstObjCSelectorName == NULL)
+ Contexts.back().FirstObjCSelectorName = Tok->Parent;
+ } else if (Contexts.back().ColonIsForRangeExpr) {
+ Tok->Type = TT_RangeBasedForLoopColon;
+ } else if (Contexts.size() == 1) {
+ Tok->Type = TT_InheritanceColon;
+ } else if (Contexts.back().ContextKind == tok::l_paren) {
+ Tok->Type = TT_InlineASMColon;
+ }
+ break;
+ case tok::kw_if:
+ case tok::kw_while:
+ if (CurrentToken != NULL && CurrentToken->is(tok::l_paren)) {
+ next();
+ if (!parseParens(/*LookForDecls=*/ true))
+ return false;
+ }
+ break;
+ case tok::kw_for:
+ Contexts.back().ColonIsForRangeExpr = true;
+ next();
+ if (!parseParens())
+ return false;
+ break;
+ case tok::l_paren:
+ if (!parseParens())
+ return false;
+ if (Line.MustBeDeclaration)
+ Line.MightBeFunctionDecl = true;
+ break;
+ case tok::l_square:
+ if (!parseSquare())
+ return false;
+ break;
+ case tok::l_brace:
+ if (!parseBrace())
+ return false;
+ break;
+ case tok::less:
+ if (parseAngle())
+ Tok->Type = TT_TemplateOpener;
+ else {
+ Tok->Type = TT_BinaryOperator;
+ CurrentToken = Tok;
+ next();
+ }
+ break;
+ case tok::r_paren:
+ case tok::r_square:
+ return false;
+ case tok::r_brace:
+ // Lines can start with '}'.
+ if (Tok->Parent != NULL)
+ return false;
+ break;
+ case tok::greater:
+ Tok->Type = TT_BinaryOperator;
+ break;
+ case tok::kw_operator:
+ while (CurrentToken && CurrentToken->isNot(tok::l_paren)) {
+ if (CurrentToken->isOneOf(tok::star, tok::amp))
+ CurrentToken->Type = TT_PointerOrReference;
+ consumeToken();
+ }
+ if (CurrentToken)
+ CurrentToken->Type = TT_OverloadedOperatorLParen;
+ break;
+ case tok::question:
+ parseConditional();
+ break;
+ case tok::kw_template:
+ parseTemplateDeclaration();
+ break;
+ case tok::identifier:
+ if (Line.First.is(tok::kw_for) &&
+ Tok->FormatTok.Tok.getIdentifierInfo() == &Ident_in)
+ Tok->Type = TT_ObjCForIn;
+ break;
+ case tok::comma:
+ if (Contexts.back().FirstStartOfName)
+ Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
+ break;
+ default:
+ break;
+ }
+ return true;
+ }
+
+ void parseIncludeDirective() {
+ next();
+ if (CurrentToken != NULL && CurrentToken->is(tok::less)) {
+ next();
+ while (CurrentToken != NULL) {
+ if (CurrentToken->isNot(tok::comment) ||
+ !CurrentToken->Children.empty())
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ } else {
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::string_literal))
+ // Mark these string literals as "implicit" literals, too, so that
+ // they are not split or line-wrapped.
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ }
+ }
+
+ void parseWarningOrError() {
+ next();
+ // We still want to format the whitespace left of the first token of the
+ // warning or error.
+ next();
+ while (CurrentToken != NULL) {
+ CurrentToken->Type = TT_ImplicitStringLiteral;
+ next();
+ }
+ }
+
+ void parsePreprocessorDirective() {
+ next();
+ if (CurrentToken == NULL)
+ return;
+ // Hashes in the middle of a line can lead to any strange token
+ // sequence.
+ if (CurrentToken->FormatTok.Tok.getIdentifierInfo() == NULL)
+ return;
+ switch (CurrentToken->FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_include:
+ case tok::pp_import:
+ parseIncludeDirective();
+ break;
+ case tok::pp_error:
+ case tok::pp_warning:
+ parseWarningOrError();
+ break;
+ default:
+ break;
+ }
+ while (CurrentToken != NULL)
+ next();
+ }
+
+public:
+ LineType parseLine() {
+ int PeriodsAndArrows = 0;
+ AnnotatedToken *LastPeriodOrArrow = NULL;
+ bool CanBeBuilderTypeStmt = true;
+ if (CurrentToken->is(tok::hash)) {
+ parsePreprocessorDirective();
+ return LT_PreprocessorDirective;
+ }
+ while (CurrentToken != NULL) {
+ if (CurrentToken->is(tok::kw_virtual))
+ KeywordVirtualFound = true;
+ if (CurrentToken->isOneOf(tok::period, tok::arrow)) {
+ ++PeriodsAndArrows;
+ LastPeriodOrArrow = CurrentToken;
+ }
+ AnnotatedToken *TheToken = CurrentToken;
+ if (!consumeToken())
+ return LT_Invalid;
+ if (getPrecedence(*TheToken) > prec::Assignment &&
+ TheToken->Type == TT_BinaryOperator)
+ CanBeBuilderTypeStmt = false;
+ }
+ if (KeywordVirtualFound)
+ return LT_VirtualFunctionDecl;
+
+ // Assume a builder-type call if there are 2 or more "." and "->".
+ if (PeriodsAndArrows >= 2 && CanBeBuilderTypeStmt) {
+ LastPeriodOrArrow->LastInChainOfCalls = true;
+ return LT_BuilderTypeCall;
+ }
+
+ if (Line.First.Type == TT_ObjCMethodSpecifier) {
+ if (Contexts.back().FirstObjCSelectorName != NULL)
+ Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
+ Contexts.back().LongestObjCSelectorName;
+ return LT_ObjCMethodDecl;
+ }
+
+ return LT_Other;
+ }
+
+private:
+ void next() {
+ if (CurrentToken != NULL) {
+ determineTokenType(*CurrentToken);
+ CurrentToken->BindingStrength = Contexts.back().BindingStrength;
+ }
+
+ if (CurrentToken != NULL && !CurrentToken->Children.empty())
+ CurrentToken = &CurrentToken->Children[0];
+ else
+ CurrentToken = NULL;
+
+ // Reset token type in case we have already looked at it and then recovered
+ // from an error (e.g. failure to find the matching >).
+ if (CurrentToken != NULL)
+ CurrentToken->Type = TT_Unknown;
+ }
+
+ /// \brief A struct to hold information valid in a specific context, e.g.
+ /// a pair of parenthesis.
+ struct Context {
+ Context(tok::TokenKind ContextKind, unsigned BindingStrength,
+ bool IsExpression)
+ : ContextKind(ContextKind), BindingStrength(BindingStrength),
+ LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
+ ColonIsObjCMethodExpr(false), FirstObjCSelectorName(NULL),
+ FirstStartOfName(NULL), IsExpression(IsExpression),
+ CanBeExpression(true) {}
+
+ tok::TokenKind ContextKind;
+ unsigned BindingStrength;
+ unsigned LongestObjCSelectorName;
+ bool ColonIsForRangeExpr;
+ bool ColonIsObjCMethodExpr;
+ AnnotatedToken *FirstObjCSelectorName;
+ AnnotatedToken *FirstStartOfName;
+ bool IsExpression;
+ bool CanBeExpression;
+ };
+
+ /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
+ /// of each instance.
+ struct ScopedContextCreator {
+ AnnotatingParser &P;
+
+ ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
+ unsigned Increase)
+ : P(P) {
+ P.Contexts.push_back(
+ Context(ContextKind, P.Contexts.back().BindingStrength + Increase,
+ P.Contexts.back().IsExpression));
+ }
+
+ ~ScopedContextCreator() { P.Contexts.pop_back(); }
+ };
+
+ void determineTokenType(AnnotatedToken &Current) {
+ if (getPrecedence(Current) == prec::Assignment) {
+ Contexts.back().IsExpression = true;
+ for (AnnotatedToken *Previous = Current.Parent;
+ Previous && Previous->isNot(tok::comma);
+ Previous = Previous->Parent) {
+ if (Previous->is(tok::r_square))
+ Previous = Previous->MatchingParen;
+ if (Previous->Type == TT_BinaryOperator &&
+ Previous->isOneOf(tok::star, tok::amp)) {
+ Previous->Type = TT_PointerOrReference;
+ }
+ }
+ } else if (Current.isOneOf(tok::kw_return, tok::kw_throw) ||
+ (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
+ (!Current.Parent || Current.Parent->isNot(tok::kw_for)))) {
+ Contexts.back().IsExpression = true;
+ } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
+ for (AnnotatedToken *Previous = Current.Parent;
+ Previous && Previous->isOneOf(tok::star, tok::amp);
+ Previous = Previous->Parent)
+ Previous->Type = TT_PointerOrReference;
+ } else if (Current.Parent &&
+ Current.Parent->Type == TT_CtorInitializerColon) {
+ Contexts.back().IsExpression = true;
+ } else if (Current.is(tok::kw_new)) {
+ Contexts.back().CanBeExpression = false;
+ }
+
+ if (Current.Type == TT_Unknown) {
+ if (Current.Parent && Current.is(tok::identifier) &&
+ ((Current.Parent->is(tok::identifier) &&
+ Current.Parent->FormatTok.Tok.getIdentifierInfo()
+ ->getPPKeywordID() == tok::pp_not_keyword) ||
+ isSimpleTypeSpecifier(*Current.Parent) ||
+ Current.Parent->Type == TT_PointerOrReference ||
+ Current.Parent->Type == TT_TemplateCloser)) {
+ Contexts.back().FirstStartOfName = &Current;
+ Current.Type = TT_StartOfName;
+ } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
+ Current.Type =
+ determineStarAmpUsage(Current, Contexts.back().IsExpression);
+ } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
+ Current.Type = determinePlusMinusCaretUsage(Current);
+ } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
+ Current.Type = determineIncrementUsage(Current);
+ } else if (Current.is(tok::exclaim)) {
+ Current.Type = TT_UnaryOperator;
+ } else if (isBinaryOperator(Current)) {
+ Current.Type = TT_BinaryOperator;
+ } else if (Current.is(tok::comment)) {
+ std::string Data(Lexer::getSpelling(Current.FormatTok.Tok, SourceMgr,
+ Lex.getLangOpts()));
+ if (StringRef(Data).startswith("//"))
+ Current.Type = TT_LineComment;
+ else
+ Current.Type = TT_BlockComment;
+ } else if (Current.is(tok::r_paren)) {
+ bool ParensNotExpr = !Current.Parent ||
+ Current.Parent->Type == TT_PointerOrReference ||
+ Current.Parent->Type == TT_TemplateCloser;
+ bool ParensCouldEndDecl =
+ !Current.Children.empty() &&
+ Current.Children[0].isOneOf(tok::equal, tok::semi, tok::l_brace);
+ bool IsSizeOfOrAlignOf =
+ Current.MatchingParen && Current.MatchingParen->Parent &&
+ Current.MatchingParen->Parent->isOneOf(tok::kw_sizeof,
+ tok::kw_alignof);
+ if (ParensNotExpr && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
+ Contexts.back().IsExpression)
+ // FIXME: We need to get smarter and understand more cases of casts.
+ Current.Type = TT_CastRParen;
+ } else if (Current.is(tok::at) && Current.Children.size()) {
+ switch (Current.Children[0].FormatTok.Tok.getObjCKeywordID()) {
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ case tok::objc_protocol:
+ Current.Type = TT_ObjCDecl;
+ break;
+ case tok::objc_property:
+ Current.Type = TT_ObjCProperty;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ /// \brief Return the type of the given token assuming it is * or &.
+ TokenType
+ determineStarAmpUsage(const AnnotatedToken &Tok, bool IsExpression) {
+ const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+ if (PrevToken == NULL)
+ return TT_UnaryOperator;
+
+ const AnnotatedToken *NextToken = getNextToken(Tok);
+ if (NextToken == NULL)
+ return TT_Unknown;
+
+ if (PrevToken->is(tok::l_paren) && !IsExpression)
+ return TT_PointerOrReference;
+
+ if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
+ tok::comma, tok::semi, tok::kw_return, tok::colon,
+ tok::equal) ||
+ PrevToken->Type == TT_BinaryOperator ||
+ PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
+ return TT_UnaryOperator;
+
+ if (NextToken->is(tok::l_square))
+ return TT_PointerOrReference;
+
+ if (PrevToken->FormatTok.Tok.isLiteral() ||
+ PrevToken->isOneOf(tok::r_paren, tok::r_square) ||
+ NextToken->FormatTok.Tok.isLiteral() || isUnaryOperator(*NextToken))
+ return TT_BinaryOperator;
+
+ // It is very unlikely that we are going to find a pointer or reference type
+ // definition on the RHS of an assignment.
+ if (IsExpression)
+ return TT_BinaryOperator;
+
+ return TT_PointerOrReference;
+ }
+
+ TokenType determinePlusMinusCaretUsage(const AnnotatedToken &Tok) {
+ const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+ if (PrevToken == NULL)
+ return TT_UnaryOperator;
+
+ // Use heuristics to recognize unary operators.
+ if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
+ tok::question, tok::colon, tok::kw_return,
+ tok::kw_case, tok::at, tok::l_brace))
+ return TT_UnaryOperator;
+
+ // There can't be two consecutive binary operators.
+ if (PrevToken->Type == TT_BinaryOperator)
+ return TT_UnaryOperator;
+
+ // Fall back to marking the token as binary operator.
+ return TT_BinaryOperator;
+ }
+
+ /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
+ TokenType determineIncrementUsage(const AnnotatedToken &Tok) {
+ const AnnotatedToken *PrevToken = getPreviousToken(Tok);
+ if (PrevToken == NULL)
+ return TT_UnaryOperator;
+ if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
+ return TT_TrailingUnaryOperator;
+
+ return TT_UnaryOperator;
+ }
+
+ // FIXME: This is copy&pasted from Sema. Put it in a common place and remove
+ // duplication.
+ /// \brief Determine whether the token kind starts a simple-type-specifier.
+ bool isSimpleTypeSpecifier(const AnnotatedToken &Tok) const {
+ switch (Tok.FormatTok.Tok.getKind()) {
+ case tok::kw_short:
+ case tok::kw_long:
+ case tok::kw___int64:
+ case tok::kw___int128:
+ case tok::kw_signed:
+ case tok::kw_unsigned:
+ case tok::kw_void:
+ case tok::kw_char:
+ case tok::kw_int:
+ case tok::kw_half:
+ case tok::kw_float:
+ case tok::kw_double:
+ case tok::kw_wchar_t:
+ case tok::kw_bool:
+ case tok::kw___underlying_type:
+ return true;
+ case tok::annot_typename:
+ case tok::kw_char16_t:
+ case tok::kw_char32_t:
+ case tok::kw_typeof:
+ case tok::kw_decltype:
+ return Lex.getLangOpts().CPlusPlus;
+ default:
+ break;
+ }
+ return false;
+ }
+
+ SmallVector<Context, 8> Contexts;
+
+ SourceManager &SourceMgr;
+ Lexer &Lex;
+ AnnotatedLine &Line;
+ AnnotatedToken *CurrentToken;
+ bool KeywordVirtualFound;
+ IdentifierInfo &Ident_in;
+};
+
+/// \brief Parses binary expressions by inserting fake parenthesis based on
+/// operator precedence.
+class ExpressionParser {
+public:
+ ExpressionParser(AnnotatedLine &Line) : Current(&Line.First) {}
+
+ /// \brief Parse expressions with the given operatore precedence.
+ void parse(int Precedence = 0) {
+ if (Precedence > prec::PointerToMember || Current == NULL)
+ return;
+
+ // Skip over "return" until we can properly parse it.
+ if (Current->is(tok::kw_return))
+ next();
+
+ // Eagerly consume trailing comments.
+ while (isTrailingComment(Current)) {
+ next();
+ }
+
+ AnnotatedToken *Start = Current;
+ bool OperatorFound = false;
+
+ while (Current) {
+ // Consume operators with higher precedence.
+ parse(prec::Level(Precedence + 1));
+
+ int CurrentPrecedence = 0;
+ if (Current) {
+ if (Current->Type == TT_ConditionalExpr)
+ CurrentPrecedence = 1 + (int) prec::Conditional;
+ else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
+ Current->Type == TT_CtorInitializerColon)
+ CurrentPrecedence = 1;
+ else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
+ CurrentPrecedence = 1 + (int) getPrecedence(*Current);
+ }
+
+ // At the end of the line or when an operator with higher precedence is
+ // found, insert fake parenthesis and return.
+ if (Current == NULL || closesScope(*Current) ||
+ (CurrentPrecedence != 0 && CurrentPrecedence < Precedence)) {
+ if (OperatorFound) {
+ ++Start->FakeLParens;
+ if (Current)
+ ++Current->Parent->FakeRParens;
+ }
+ return;
+ }
+
+ // Consume scopes: (), [], <> and {}
+ if (opensScope(*Current)) {
+ AnnotatedToken *Left = Current;
+ while (Current && !closesScope(*Current)) {
+ next();
+ parse();
+ }
+ // Remove fake parens that just duplicate the real parens.
+ if (Current && Left->Children[0].FakeLParens > 0 &&
+ Current->Parent->FakeRParens > 0) {
+ --Left->Children[0].FakeLParens;
+ --Current->Parent->FakeRParens;
+ }
+ next();
+ } else {
+ // Operator found.
+ if (CurrentPrecedence == Precedence)
+ OperatorFound = true;
+
+ next();
+ }
+ }
+ }
+
+private:
+ void next() {
+ if (Current != NULL)
+ Current = Current->Children.empty() ? NULL : &Current->Children[0];
+ }
+
+ AnnotatedToken *Current;
+};
+
+void TokenAnnotator::annotate(AnnotatedLine &Line) {
+ AnnotatingParser Parser(SourceMgr, Lex, Line, Ident_in);
+ Line.Type = Parser.parseLine();
+ if (Line.Type == LT_Invalid)
+ return;
+
+ ExpressionParser ExprParser(Line);
+ ExprParser.parse();
+
+ if (Line.First.Type == TT_ObjCMethodSpecifier)
+ Line.Type = LT_ObjCMethodDecl;
+ else if (Line.First.Type == TT_ObjCDecl)
+ Line.Type = LT_ObjCDecl;
+ else if (Line.First.Type == TT_ObjCProperty)
+ Line.Type = LT_ObjCProperty;
+
+ Line.First.SpacesRequiredBefore = 1;
+ Line.First.MustBreakBefore = Line.First.FormatTok.MustBreakBefore;
+ Line.First.CanBreakBefore = Line.First.MustBreakBefore;
+
+ Line.First.TotalLength = Line.First.FormatTok.TokenLength;
+}
+
+void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
+ if (Line.First.Children.empty())
+ return;
+ AnnotatedToken *Current = &Line.First.Children[0];
+ while (Current != NULL) {
+ if (Current->Type == TT_LineComment)
+ Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
+ else
+ Current->SpacesRequiredBefore =
+ spaceRequiredBefore(Line, *Current) ? 1 : 0;
+
+ if (Current->FormatTok.MustBreakBefore) {
+ Current->MustBreakBefore = true;
+ } else if (Current->Type == TT_LineComment) {
+ Current->MustBreakBefore = Current->FormatTok.NewlinesBefore > 0;
+ } else if (isTrailingComment(Current->Parent) ||
+ (Current->is(tok::string_literal) &&
+ Current->Parent->is(tok::string_literal))) {
+ Current->MustBreakBefore = true;
+ } else if (Current->is(tok::lessless) && !Current->Children.empty() &&
+ Current->Parent->is(tok::string_literal) &&
+ Current->Children[0].is(tok::string_literal)) {
+ Current->MustBreakBefore = true;
+ } else {
+ Current->MustBreakBefore = false;
+ }
+ Current->CanBreakBefore =
+ Current->MustBreakBefore || canBreakBefore(Line, *Current);
+ if (Current->MustBreakBefore)
+ Current->TotalLength = Current->Parent->TotalLength + Style.ColumnLimit;
+ else
+ Current->TotalLength =
+ Current->Parent->TotalLength + Current->FormatTok.TokenLength +
+ Current->SpacesRequiredBefore;
+ // FIXME: Only calculate this if CanBreakBefore is true once static
+ // initializers etc. are sorted out.
+ // FIXME: Move magic numbers to a better place.
+ Current->SplitPenalty =
+ 20 * Current->BindingStrength + splitPenalty(Line, *Current);
+
+ Current = Current->Children.empty() ? NULL : &Current->Children[0];
+ }
+}
+
+unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
+ const AnnotatedToken &Tok) {
+ const AnnotatedToken &Left = *Tok.Parent;
+ const AnnotatedToken &Right = Tok;
+
+ if (Right.Type == TT_StartOfName) {
+ if (Line.First.is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
+ return 3;
+ else if (Line.MightBeFunctionDecl && Right.BindingStrength == 1)
+ // FIXME: Clean up hack of using BindingStrength to find top-level names.
+ return Style.PenaltyReturnTypeOnItsOwnLine;
+ else
+ return 100;
+ }
+ if (Left.is(tok::equal) && Right.is(tok::l_brace))
+ return 150;
+ if (Left.is(tok::coloncolon))
+ return 500;
+
+ if (Left.Type == TT_RangeBasedForLoopColon ||
+ Left.Type == TT_InheritanceColon)
+ return 2;
+
+ if (Right.isOneOf(tok::arrow, tok::period)) {
+ if (Line.Type == LT_BuilderTypeCall)
+ return prec::PointerToMember;
+ if (Left.isOneOf(tok::r_paren, tok::r_square) && Left.MatchingParen &&
+ Left.MatchingParen->ParameterCount > 0)
+ return 20; // Should be smaller than breaking at a nested comma.
+ return 150;
+ }
+
+ // In for-loops, prefer breaking at ',' and ';'.
+ if (Line.First.is(tok::kw_for) && Left.is(tok::equal))
+ return 4;
+
+ if (Left.is(tok::semi))
+ return 0;
+ if (Left.is(tok::comma))
+ return 1;
+
+ // In Objective-C method expressions, prefer breaking before "param:" over
+ // breaking after it.
+ if (Right.Type == TT_ObjCSelectorName)
+ return 0;
+ if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
+ return 20;
+
+ if (opensScope(Left))
+ return Left.ParameterCount > 1 ? prec::Comma : 20;
+
+ if (Right.is(tok::lessless)) {
+ if (Left.is(tok::string_literal)) {
+ StringRef Content = StringRef(Left.FormatTok.Tok.getLiteralData(),
+ Left.FormatTok.TokenLength);
+ Content = Content.drop_back(1).drop_front(1).trim();
+ if (Content.size() > 1 &&
+ (Content.back() == ':' || Content.back() == '='))
+ return 100;
+ }
+ return prec::Shift;
+ }
+ if (Left.Type == TT_ConditionalExpr)
+ return prec::Conditional;
+ prec::Level Level = getPrecedence(Left);
+
+ if (Level != prec::Unknown)
+ return Level;
+
+ return 3;
+}
+
+bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
+ const AnnotatedToken &Left,
+ const AnnotatedToken &Right) {
+ if (Right.is(tok::hashhash))
+ return Left.is(tok::hash);
+ if (Left.isOneOf(tok::hashhash, tok::hash))
+ return Right.is(tok::hash);
+ if (Right.isOneOf(tok::r_paren, tok::semi, tok::comma))
+ return false;
+ if (Right.is(tok::less) &&
+ (Left.is(tok::kw_template) ||
+ (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
+ return true;
+ if (Left.is(tok::arrow) || Right.is(tok::arrow))
+ return false;
+ if (Left.isOneOf(tok::exclaim, tok::tilde))
+ return false;
+ if (Left.is(tok::at) &&
+ Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
+ tok::numeric_constant, tok::l_paren, tok::l_brace,
+ tok::kw_true, tok::kw_false))
+ return false;
+ if (Left.is(tok::coloncolon))
+ return false;
+ if (Right.is(tok::coloncolon))
+ return !Left.isOneOf(tok::identifier, tok::greater, tok::l_paren);
+ if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
+ return false;
+ if (Right.Type == TT_PointerOrReference)
+ return Left.FormatTok.Tok.isLiteral() ||
+ ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
+ !Style.PointerBindsToType);
+ if (Left.Type == TT_PointerOrReference)
+ return Right.FormatTok.Tok.isLiteral() ||
+ ((Right.Type != TT_PointerOrReference) &&
+ Right.isNot(tok::l_paren) && Style.PointerBindsToType &&
+ Left.Parent && Left.Parent->isNot(tok::l_paren));
+ if (Right.is(tok::star) && Left.is(tok::l_paren))
+ return false;
+ if (Left.is(tok::l_square))
+ return Left.Type == TT_ObjCArrayLiteral && Right.isNot(tok::r_square);
+ if (Right.is(tok::r_square))
+ return Right.Type == TT_ObjCArrayLiteral;
+ if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr)
+ return false;
+ if (Left.is(tok::period) || Right.is(tok::period))
+ return false;
+ if (Left.is(tok::colon))
+ return Left.Type != TT_ObjCMethodExpr;
+ if (Right.is(tok::colon))
+ return Right.Type != TT_ObjCMethodExpr;
+ if (Left.is(tok::l_paren))
+ return false;
+ if (Right.is(tok::l_paren)) {
+ return Line.Type == LT_ObjCDecl ||
+ Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, tok::kw_switch,
+ tok::kw_return, tok::kw_catch, tok::kw_new,
+ tok::kw_delete);
+ }
+ if (Left.is(tok::at) &&
+ Right.FormatTok.Tok.getObjCKeywordID() != tok::objc_not_keyword)
+ return false;
+ if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
+ return false;
+ return true;
+}
+
+bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
+ const AnnotatedToken &Tok) {
+ if (Tok.FormatTok.Tok.getIdentifierInfo() &&
+ Tok.Parent->FormatTok.Tok.getIdentifierInfo())
+ return true; // Never ever merge two identifiers.
+ if (Line.Type == LT_ObjCMethodDecl) {
+ if (Tok.Parent->Type == TT_ObjCMethodSpecifier)
+ return true;
+ if (Tok.Parent->is(tok::r_paren) && Tok.is(tok::identifier))
+ // Don't space between ')' and <id>
+ return false;
+ }
+ if (Line.Type == LT_ObjCProperty &&
+ (Tok.is(tok::equal) || Tok.Parent->is(tok::equal)))
+ return false;
+
+ if (Tok.Parent->is(tok::comma))
+ return true;
+ if (Tok.is(tok::comma))
+ return false;
+ if (Tok.Type == TT_CtorInitializerColon || Tok.Type == TT_ObjCBlockLParen)
+ return true;
+ if (Tok.Parent->FormatTok.Tok.is(tok::kw_operator))
+ return false;
+ if (Tok.Type == TT_OverloadedOperatorLParen)
+ return false;
+ if (Tok.is(tok::colon))
+ return !Line.First.isOneOf(tok::kw_case, tok::kw_default) &&
+ !Tok.Children.empty() && Tok.Type != TT_ObjCMethodExpr;
+ if (Tok.is(tok::l_paren) && !Tok.Children.empty() &&
+ Tok.Children[0].Type == TT_PointerOrReference &&
+ !Tok.Children[0].Children.empty() &&
+ Tok.Children[0].Children[0].isNot(tok::r_paren) &&
+ Tok.Parent->isNot(tok::l_paren) &&
+ (Tok.Parent->Type != TT_PointerOrReference || Style.PointerBindsToType))
+ return true;
+ if (Tok.Parent->Type == TT_UnaryOperator || Tok.Parent->Type == TT_CastRParen)
+ return false;
+ if (Tok.Type == TT_UnaryOperator)
+ return !Tok.Parent->isOneOf(tok::l_paren, tok::l_square, tok::at) &&
+ (Tok.Parent->isNot(tok::colon) ||
+ Tok.Parent->Type != TT_ObjCMethodExpr);
+ if (Tok.Parent->is(tok::greater) && Tok.is(tok::greater)) {
+ return Tok.Type == TT_TemplateCloser &&
+ Tok.Parent->Type == TT_TemplateCloser &&
+ Style.Standard != FormatStyle::LS_Cpp11;
+ }
+ if (Tok.isOneOf(tok::arrowstar, tok::periodstar) ||
+ Tok.Parent->isOneOf(tok::arrowstar, tok::periodstar))
+ return false;
+ if (Tok.Type == TT_BinaryOperator || Tok.Parent->Type == TT_BinaryOperator)
+ return true;
+ if (Tok.Parent->Type == TT_TemplateCloser && Tok.is(tok::l_paren))
+ return false;
+ if (Tok.is(tok::less) && Line.First.is(tok::hash))
+ return true;
+ if (Tok.Type == TT_TrailingUnaryOperator)
+ return false;
+ return spaceRequiredBetween(Line, *Tok.Parent, Tok);
+}
+
+bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
+ const AnnotatedToken &Right) {
+ const AnnotatedToken &Left = *Right.Parent;
+ if (Right.Type == TT_StartOfName)
+ return true;
+ if (Right.is(tok::colon) && Right.Type == TT_ObjCMethodExpr)
+ return false;
+ if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
+ return true;
+ if (Right.Type == TT_ObjCSelectorName)
+ return true;
+ if (Left.ClosesTemplateDeclaration)
+ return true;
+ if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
+ return true;
+ if (Right.Type == TT_RangeBasedForLoopColon ||
+ Right.Type == TT_InheritanceColon)
+ return false;
+ if (Left.Type == TT_RangeBasedForLoopColon ||
+ Left.Type == TT_InheritanceColon)
+ return true;
+ if (Right.Type == TT_RangeBasedForLoopColon)
+ return false;
+ if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
+ Left.Type == TT_UnaryOperator || Left.Type == TT_ConditionalExpr ||
+ Left.isOneOf(tok::question, tok::kw_operator))
+ return false;
+ if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
+ return false;
+ if (Left.is(tok::l_paren) && Right.is(tok::l_paren) && Left.Parent &&
+ Left.Parent->is(tok::kw___attribute))
+ return false;
+
+ if (Right.Type == TT_LineComment)
+ // We rely on MustBreakBefore being set correctly here as we should not
+ // change the "binding" behavior of a comment.
+ return false;
+
+ // Allow breaking after a trailing 'const', e.g. after a method declaration,
+ // unless it is follow by ';', '{' or '='.
+ if (Left.is(tok::kw_const) && Left.Parent != NULL &&
+ Left.Parent->is(tok::r_paren))
+ return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal);
+
+ if (Right.is(tok::kw___attribute))
+ return true;
+
+ // We only break before r_brace if there was a corresponding break before
+ // the l_brace, which is tracked by BreakBeforeClosingBrace.
+ if (Right.isOneOf(tok::r_brace, tok::r_paren, tok::greater))
+ return false;
+ if (Left.is(tok::identifier) && Right.is(tok::string_literal))
+ return true;
+ return (isBinaryOperator(Left) && Left.isNot(tok::lessless)) ||
+ Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace) ||
+ Right.isOneOf(tok::lessless, tok::arrow, tok::period, tok::colon) ||
+ (Left.is(tok::r_paren) && Left.Type != TT_CastRParen &&
+ Right.isOneOf(tok::identifier, tok::kw___attribute)) ||
+ (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) ||
+ (Left.is(tok::l_square) && !Right.is(tok::r_square));
+}
+
+} // namespace format
+} // namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
new file mode 100644
index 000000000000..c41ee33c439b
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/TokenAnnotator.h
@@ -0,0 +1,262 @@
+//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements a token annotator, i.e. creates
+/// \c AnnotatedTokens out of \c FormatTokens with required extra information.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
+#define LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
+
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/OperatorPrecedence.h"
+#include "clang/Format/Format.h"
+#include <string>
+
+namespace clang {
+class Lexer;
+class SourceManager;
+
+namespace format {
+
+enum TokenType {
+ TT_BinaryOperator,
+ TT_BlockComment,
+ TT_CastRParen,
+ TT_ConditionalExpr,
+ TT_CtorInitializerColon,
+ TT_ImplicitStringLiteral,
+ TT_InlineASMColon,
+ TT_InheritanceColon,
+ TT_LineComment,
+ TT_ObjCArrayLiteral,
+ TT_ObjCBlockLParen,
+ TT_ObjCDecl,
+ TT_ObjCForIn,
+ TT_ObjCMethodExpr,
+ TT_ObjCMethodSpecifier,
+ TT_ObjCProperty,
+ TT_ObjCSelectorName,
+ TT_OverloadedOperatorLParen,
+ TT_PointerOrReference,
+ TT_PureVirtualSpecifier,
+ TT_RangeBasedForLoopColon,
+ TT_StartOfName,
+ TT_TemplateCloser,
+ TT_TemplateOpener,
+ TT_TrailingUnaryOperator,
+ TT_UnaryOperator,
+ TT_Unknown
+};
+
+enum LineType {
+ LT_Invalid,
+ LT_Other,
+ LT_BuilderTypeCall,
+ LT_PreprocessorDirective,
+ LT_VirtualFunctionDecl,
+ LT_ObjCDecl, // An @interface, @implementation, or @protocol line.
+ LT_ObjCMethodDecl,
+ LT_ObjCProperty // An @property line.
+};
+
+class AnnotatedToken {
+public:
+ explicit AnnotatedToken(const FormatToken &FormatTok)
+ : FormatTok(FormatTok), Type(TT_Unknown), SpacesRequiredBefore(0),
+ CanBreakBefore(false), MustBreakBefore(false),
+ ClosesTemplateDeclaration(false), MatchingParen(NULL),
+ ParameterCount(0), BindingStrength(0), SplitPenalty(0),
+ LongestObjCSelectorName(0), Parent(NULL), FakeLParens(0),
+ FakeRParens(0), LastInChainOfCalls(false),
+ PartOfMultiVariableDeclStmt(false) {}
+
+ bool is(tok::TokenKind Kind) const { return FormatTok.Tok.is(Kind); }
+
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2) const {
+ return is(K1) || is(K2);
+ }
+
+ bool isOneOf(tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3) const {
+ return is(K1) || is(K2) || is(K3);
+ }
+
+ bool isOneOf(
+ tok::TokenKind K1, tok::TokenKind K2, tok::TokenKind K3,
+ tok::TokenKind K4, tok::TokenKind K5 = tok::NUM_TOKENS,
+ tok::TokenKind K6 = tok::NUM_TOKENS, tok::TokenKind K7 = tok::NUM_TOKENS,
+ tok::TokenKind K8 = tok::NUM_TOKENS, tok::TokenKind K9 = tok::NUM_TOKENS,
+ tok::TokenKind K10 = tok::NUM_TOKENS,
+ tok::TokenKind K11 = tok::NUM_TOKENS,
+ tok::TokenKind K12 = tok::NUM_TOKENS) const {
+ return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) ||
+ is(K8) || is(K9) || is(K10) || is(K11) || is(K12);
+ }
+
+ bool isNot(tok::TokenKind Kind) const { return FormatTok.Tok.isNot(Kind); }
+
+ bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const {
+ return FormatTok.Tok.isObjCAtKeyword(Kind);
+ }
+
+ bool isAccessSpecifier(bool ColonRequired = true) const {
+ return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
+ (!ColonRequired ||
+ (!Children.empty() && Children[0].is(tok::colon)));
+ }
+
+ bool isObjCAccessSpecifier() const {
+ return is(tok::at) && !Children.empty() &&
+ (Children[0].isObjCAtKeyword(tok::objc_public) ||
+ Children[0].isObjCAtKeyword(tok::objc_protected) ||
+ Children[0].isObjCAtKeyword(tok::objc_package) ||
+ Children[0].isObjCAtKeyword(tok::objc_private));
+ }
+
+ FormatToken FormatTok;
+
+ TokenType Type;
+
+ unsigned SpacesRequiredBefore;
+ bool CanBreakBefore;
+ bool MustBreakBefore;
+
+ bool ClosesTemplateDeclaration;
+
+ AnnotatedToken *MatchingParen;
+
+ /// \brief Number of parameters, if this is "(", "[" or "<".
+ ///
+ /// This is initialized to 1 as we don't need to distinguish functions with
+ /// 0 parameters from functions with 1 parameter. Thus, we can simply count
+ /// the number of commas.
+ unsigned ParameterCount;
+
+ /// \brief The total length of the line up to and including this token.
+ unsigned TotalLength;
+
+ // FIXME: Come up with a 'cleaner' concept.
+ /// \brief The binding strength of a token. This is a combined value of
+ /// operator precedence, parenthesis nesting, etc.
+ unsigned BindingStrength;
+
+ /// \brief Penalty for inserting a line break before this token.
+ unsigned SplitPenalty;
+
+ /// \brief If this is the first ObjC selector name in an ObjC method
+ /// definition or call, this contains the length of the longest name.
+ unsigned LongestObjCSelectorName;
+
+ std::vector<AnnotatedToken> Children;
+ AnnotatedToken *Parent;
+
+ /// \brief Insert this many fake ( before this token for correct indentation.
+ unsigned FakeLParens;
+ /// \brief Insert this many fake ) after this token for correct indentation.
+ unsigned FakeRParens;
+
+ /// \brief Is this the last "." or "->" in a builder-type call?
+ bool LastInChainOfCalls;
+
+ /// \brief Is this token part of a \c DeclStmt defining multiple variables?
+ ///
+ /// Only set if \c Type == \c TT_StartOfName.
+ bool PartOfMultiVariableDeclStmt;
+
+ const AnnotatedToken *getPreviousNoneComment() const {
+ AnnotatedToken *Tok = Parent;
+ while (Tok != NULL && Tok->is(tok::comment))
+ Tok = Tok->Parent;
+ return Tok;
+ }
+};
+
+class AnnotatedLine {
+public:
+ AnnotatedLine(const UnwrappedLine &Line)
+ : First(Line.Tokens.front()), Level(Line.Level),
+ InPPDirective(Line.InPPDirective),
+ MustBeDeclaration(Line.MustBeDeclaration),
+ MightBeFunctionDecl(false) {
+ assert(!Line.Tokens.empty());
+ AnnotatedToken *Current = &First;
+ for (std::list<FormatToken>::const_iterator I = ++Line.Tokens.begin(),
+ E = Line.Tokens.end();
+ I != E; ++I) {
+ Current->Children.push_back(AnnotatedToken(*I));
+ Current->Children[0].Parent = Current;
+ Current = &Current->Children[0];
+ }
+ Last = Current;
+ }
+ AnnotatedLine(const AnnotatedLine &Other)
+ : First(Other.First), Type(Other.Type), Level(Other.Level),
+ InPPDirective(Other.InPPDirective),
+ MustBeDeclaration(Other.MustBeDeclaration),
+ MightBeFunctionDecl(Other.MightBeFunctionDecl) {
+ Last = &First;
+ while (!Last->Children.empty()) {
+ Last->Children[0].Parent = Last;
+ Last = &Last->Children[0];
+ }
+ }
+
+ AnnotatedToken First;
+ AnnotatedToken *Last;
+
+ LineType Type;
+ unsigned Level;
+ bool InPPDirective;
+ bool MustBeDeclaration;
+ bool MightBeFunctionDecl;
+};
+
+inline prec::Level getPrecedence(const AnnotatedToken &Tok) {
+ return getBinOpPrecedence(Tok.FormatTok.Tok.getKind(), true, true);
+}
+
+/// \brief Determines extra information about the tokens comprising an
+/// \c UnwrappedLine.
+class TokenAnnotator {
+public:
+ TokenAnnotator(const FormatStyle &Style, SourceManager &SourceMgr, Lexer &Lex,
+ IdentifierInfo &Ident_in)
+ : Style(Style), SourceMgr(SourceMgr), Lex(Lex), Ident_in(Ident_in) {
+ }
+
+ void annotate(AnnotatedLine &Line);
+ void calculateFormattingInformation(AnnotatedLine &Line);
+
+private:
+ /// \brief Calculate the penalty for splitting before \c Tok.
+ unsigned splitPenalty(const AnnotatedLine &Line, const AnnotatedToken &Tok);
+
+ bool spaceRequiredBetween(const AnnotatedLine &Line,
+ const AnnotatedToken &Left,
+ const AnnotatedToken &Right);
+
+ bool spaceRequiredBefore(const AnnotatedLine &Line,
+ const AnnotatedToken &Tok);
+
+ bool canBreakBefore(const AnnotatedLine &Line, const AnnotatedToken &Right);
+
+ const FormatStyle &Style;
+ SourceManager &SourceMgr;
+ Lexer &Lex;
+
+ // Contextual keywords:
+ IdentifierInfo &Ident_in;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_TOKEN_ANNOTATOR_H
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
new file mode 100644
index 000000000000..89a391bd1928
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp
@@ -0,0 +1,858 @@
+//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the implementation of the UnwrappedLineParser,
+/// which turns a stream of tokens into UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "format-parser"
+
+#include "UnwrappedLineParser.h"
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/Support/Debug.h"
+
+namespace clang {
+namespace format {
+
+class ScopedDeclarationState {
+public:
+ ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
+ bool MustBeDeclaration)
+ : Line(Line), Stack(Stack) {
+ Line.MustBeDeclaration = MustBeDeclaration;
+ Stack.push_back(MustBeDeclaration);
+ }
+ ~ScopedDeclarationState() {
+ Stack.pop_back();
+ if (!Stack.empty())
+ Line.MustBeDeclaration = Stack.back();
+ else
+ Line.MustBeDeclaration = true;
+ }
+private:
+ UnwrappedLine &Line;
+ std::vector<bool> &Stack;
+};
+
+class ScopedMacroState : public FormatTokenSource {
+public:
+ ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
+ FormatToken &ResetToken)
+ : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
+ PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
+ TokenSource = this;
+ Line.Level = 0;
+ Line.InPPDirective = true;
+ }
+
+ ~ScopedMacroState() {
+ TokenSource = PreviousTokenSource;
+ ResetToken = Token;
+ Line.InPPDirective = false;
+ Line.Level = PreviousLineLevel;
+ }
+
+ virtual FormatToken getNextToken() {
+ // The \c UnwrappedLineParser guards against this by never calling
+ // \c getNextToken() after it has encountered the first eof token.
+ assert(!eof());
+ Token = PreviousTokenSource->getNextToken();
+ if (eof())
+ return createEOF();
+ return Token;
+ }
+
+private:
+ bool eof() { return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; }
+
+ FormatToken createEOF() {
+ FormatToken FormatTok;
+ FormatTok.Tok.startToken();
+ FormatTok.Tok.setKind(tok::eof);
+ return FormatTok;
+ }
+
+ UnwrappedLine &Line;
+ FormatTokenSource *&TokenSource;
+ FormatToken &ResetToken;
+ unsigned PreviousLineLevel;
+ FormatTokenSource *PreviousTokenSource;
+
+ FormatToken Token;
+};
+
+class ScopedLineState {
+public:
+ ScopedLineState(UnwrappedLineParser &Parser,
+ bool SwitchToPreprocessorLines = false)
+ : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
+ if (SwitchToPreprocessorLines)
+ Parser.CurrentLines = &Parser.PreprocessorDirectives;
+ PreBlockLine = Parser.Line.take();
+ Parser.Line.reset(new UnwrappedLine());
+ Parser.Line->Level = PreBlockLine->Level;
+ Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
+ }
+
+ ~ScopedLineState() {
+ if (!Parser.Line->Tokens.empty()) {
+ Parser.addUnwrappedLine();
+ }
+ assert(Parser.Line->Tokens.empty());
+ Parser.Line.reset(PreBlockLine);
+ Parser.MustBreakBeforeNextToken = true;
+ if (SwitchToPreprocessorLines)
+ Parser.CurrentLines = &Parser.Lines;
+ }
+
+private:
+ UnwrappedLineParser &Parser;
+ const bool SwitchToPreprocessorLines;
+
+ UnwrappedLine *PreBlockLine;
+};
+
+UnwrappedLineParser::UnwrappedLineParser(
+ clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
+ FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
+ : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
+ CurrentLines(&Lines), Diag(Diag), Style(Style), Tokens(&Tokens),
+ Callback(Callback) {}
+
+bool UnwrappedLineParser::parse() {
+ DEBUG(llvm::dbgs() << "----\n");
+ readToken();
+ bool Error = parseFile();
+ for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
+ I != E; ++I) {
+ Callback.consumeUnwrappedLine(*I);
+ }
+
+ // Create line with eof token.
+ pushToken(FormatTok);
+ Callback.consumeUnwrappedLine(*Line);
+
+ return Error;
+}
+
+bool UnwrappedLineParser::parseFile() {
+ ScopedDeclarationState DeclarationState(
+ *Line, DeclarationScopeStack,
+ /*MustBeDeclaration=*/ !Line->InPPDirective);
+ bool Error = parseLevel(/*HasOpeningBrace=*/ false);
+ // Make sure to format the remaining tokens.
+ flushComments(true);
+ addUnwrappedLine();
+ return Error;
+}
+
+bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
+ bool Error = false;
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::comment:
+ nextToken();
+ addUnwrappedLine();
+ break;
+ case tok::l_brace:
+ // FIXME: Add parameter whether this can happen - if this happens, we must
+ // be in a non-declaration context.
+ Error |= parseBlock(/*MustBeDeclaration=*/ false);
+ addUnwrappedLine();
+ break;
+ case tok::r_brace:
+ if (HasOpeningBrace) {
+ return false;
+ } else {
+ Diag.Report(FormatTok.Tok.getLocation(),
+ Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
+ "unexpected '}'"));
+ Error = true;
+ nextToken();
+ addUnwrappedLine();
+ }
+ break;
+ default:
+ parseStructuralElement();
+ break;
+ }
+ } while (!eof());
+ return Error;
+}
+
+bool UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
+ unsigned AddLevels) {
+ assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
+ nextToken();
+
+ addUnwrappedLine();
+
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ MustBeDeclaration);
+ Line->Level += AddLevels;
+ parseLevel(/*HasOpeningBrace=*/ true);
+
+ if (!FormatTok.Tok.is(tok::r_brace)) {
+ Line->Level -= AddLevels;
+ return true;
+ }
+
+ nextToken(); // Munch the closing brace.
+ Line->Level -= AddLevels;
+ return false;
+}
+
+void UnwrappedLineParser::parsePPDirective() {
+ assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
+ ScopedMacroState MacroState(*Line, Tokens, FormatTok);
+ nextToken();
+
+ if (FormatTok.Tok.getIdentifierInfo() == NULL) {
+ parsePPUnknown();
+ return;
+ }
+
+ switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
+ case tok::pp_define:
+ parsePPDefine();
+ break;
+ default:
+ parsePPUnknown();
+ break;
+ }
+}
+
+void UnwrappedLineParser::parsePPDefine() {
+ nextToken();
+
+ if (FormatTok.Tok.getKind() != tok::identifier) {
+ parsePPUnknown();
+ return;
+ }
+ nextToken();
+ if (FormatTok.Tok.getKind() == tok::l_paren &&
+ FormatTok.WhiteSpaceLength == 0) {
+ parseParens();
+ }
+ addUnwrappedLine();
+ Line->Level = 1;
+
+ // Errors during a preprocessor directive can only affect the layout of the
+ // preprocessor directive, and thus we ignore them. An alternative approach
+ // would be to use the same approach we use on the file level (no
+ // re-indentation if there was a structural error) within the macro
+ // definition.
+ parseFile();
+}
+
+void UnwrappedLineParser::parsePPUnknown() {
+ do {
+ nextToken();
+ } while (!eof());
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseStructuralElement() {
+ assert(!FormatTok.Tok.is(tok::l_brace));
+ int TokenNumber = 0;
+ switch (FormatTok.Tok.getKind()) {
+ case tok::at:
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBracedList();
+ break;
+ }
+ switch (FormatTok.Tok.getObjCKeywordID()) {
+ case tok::objc_public:
+ case tok::objc_protected:
+ case tok::objc_package:
+ case tok::objc_private:
+ return parseAccessSpecifier();
+ case tok::objc_interface:
+ case tok::objc_implementation:
+ return parseObjCInterfaceOrImplementation();
+ case tok::objc_protocol:
+ return parseObjCProtocol();
+ case tok::objc_end:
+ return; // Handled by the caller.
+ case tok::objc_optional:
+ case tok::objc_required:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ default:
+ break;
+ }
+ break;
+ case tok::kw_namespace:
+ parseNamespace();
+ return;
+ case tok::kw_inline:
+ nextToken();
+ TokenNumber++;
+ if (FormatTok.Tok.is(tok::kw_namespace)) {
+ parseNamespace();
+ return;
+ }
+ break;
+ case tok::kw_public:
+ case tok::kw_protected:
+ case tok::kw_private:
+ parseAccessSpecifier();
+ return;
+ case tok::kw_if:
+ parseIfThenElse();
+ return;
+ case tok::kw_for:
+ case tok::kw_while:
+ parseForOrWhileLoop();
+ return;
+ case tok::kw_do:
+ parseDoWhile();
+ return;
+ case tok::kw_switch:
+ parseSwitch();
+ return;
+ case tok::kw_default:
+ nextToken();
+ parseLabel();
+ return;
+ case tok::kw_case:
+ parseCaseLabel();
+ return;
+ case tok::kw_return:
+ parseReturn();
+ return;
+ case tok::kw_extern:
+ nextToken();
+ if (FormatTok.Tok.is(tok::string_literal)) {
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ true, 0);
+ addUnwrappedLine();
+ return;
+ }
+ }
+ // In all other cases, parse the declaration.
+ break;
+ default:
+ break;
+ }
+ do {
+ ++TokenNumber;
+ switch (FormatTok.Tok.getKind()) {
+ case tok::at:
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace))
+ parseBracedList();
+ break;
+ case tok::kw_enum:
+ parseEnum();
+ break;
+ case tok::kw_struct:
+ case tok::kw_union:
+ case tok::kw_class:
+ parseRecord();
+ // A record declaration or definition is always the start of a structural
+ // element.
+ break;
+ case tok::semi:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ case tok::r_brace:
+ addUnwrappedLine();
+ return;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::l_brace:
+ // A block outside of parentheses must be the last part of a
+ // structural element.
+ // FIXME: Figure out cases where this is not true, and add projections for
+ // them (the one we know is missing are lambdas).
+ parseBlock(/*MustBeDeclaration=*/ false);
+ addUnwrappedLine();
+ return;
+ case tok::identifier:
+ nextToken();
+ if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
+ parseLabel();
+ return;
+ }
+ break;
+ case tok::equal:
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBracedList();
+ }
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseBracedList() {
+ nextToken();
+
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::l_brace:
+ parseBracedList();
+ break;
+ case tok::r_brace:
+ nextToken();
+ return;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseReturn() {
+ nextToken();
+
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::l_brace:
+ parseBracedList();
+ break;
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::r_brace:
+ // Assume missing ';'.
+ addUnwrappedLine();
+ return;
+ case tok::semi:
+ nextToken();
+ addUnwrappedLine();
+ return;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseParens() {
+ assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
+ nextToken();
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::r_paren:
+ nextToken();
+ return;
+ case tok::l_brace: {
+ nextToken();
+ ScopedLineState LineState(*this);
+ ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
+ /*MustBeDeclaration=*/ false);
+ Line->Level += 1;
+ parseLevel(/*HasOpeningBrace=*/ true);
+ Line->Level -= 1;
+ break;
+ }
+ case tok::at:
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace))
+ parseBracedList();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseIfThenElse() {
+ assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_paren))
+ parseParens();
+ bool NeedsUnwrappedLine = false;
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false);
+ NeedsUnwrappedLine = true;
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+ if (FormatTok.Tok.is(tok::kw_else)) {
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false);
+ addUnwrappedLine();
+ } else if (FormatTok.Tok.is(tok::kw_if)) {
+ parseIfThenElse();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+ } else if (NeedsUnwrappedLine) {
+ addUnwrappedLine();
+ }
+}
+
+void UnwrappedLineParser::parseNamespace() {
+ assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::identifier))
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ true, 0);
+ // Munch the semicolon after a namespace. This is more common than one would
+ // think. Puttin the semicolon into its own line is very ugly.
+ if (FormatTok.Tok.is(tok::semi))
+ nextToken();
+ addUnwrappedLine();
+ }
+ // FIXME: Add error handling.
+}
+
+void UnwrappedLineParser::parseForOrWhileLoop() {
+ assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
+ "'for' or 'while' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_paren))
+ parseParens();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false);
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+}
+
+void UnwrappedLineParser::parseDoWhile() {
+ assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false);
+ } else {
+ addUnwrappedLine();
+ ++Line->Level;
+ parseStructuralElement();
+ --Line->Level;
+ }
+
+ // FIXME: Add error handling.
+ if (!FormatTok.Tok.is(tok::kw_while)) {
+ addUnwrappedLine();
+ return;
+ }
+
+ nextToken();
+ parseStructuralElement();
+}
+
+void UnwrappedLineParser::parseLabel() {
+ if (FormatTok.Tok.isNot(tok::colon))
+ return;
+ nextToken();
+ unsigned OldLineLevel = Line->Level;
+ if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
+ --Line->Level;
+ if (CommentsBeforeNextToken.empty() && FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false);
+ if (FormatTok.Tok.is(tok::kw_break))
+ parseStructuralElement(); // "break;" after "}" goes on the same line.
+ }
+ addUnwrappedLine();
+ Line->Level = OldLineLevel;
+}
+
+void UnwrappedLineParser::parseCaseLabel() {
+ assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
+ // FIXME: fix handling of complex expressions here.
+ do {
+ nextToken();
+ } while (!eof() && !FormatTok.Tok.is(tok::colon));
+ parseLabel();
+}
+
+void UnwrappedLineParser::parseSwitch() {
+ assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
+ nextToken();
+ if (FormatTok.Tok.is(tok::l_paren))
+ parseParens();
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ parseBlock(/*MustBeDeclaration=*/ false, Style.IndentCaseLabels ? 2 : 1);
+ addUnwrappedLine();
+ } else {
+ addUnwrappedLine();
+ Line->Level += (Style.IndentCaseLabels ? 2 : 1);
+ parseStructuralElement();
+ Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
+ }
+}
+
+void UnwrappedLineParser::parseAccessSpecifier() {
+ nextToken();
+ // Otherwise, we don't know what it is, and we'd better keep the next token.
+ if (FormatTok.Tok.is(tok::colon))
+ nextToken();
+ addUnwrappedLine();
+}
+
+void UnwrappedLineParser::parseEnum() {
+ nextToken();
+ if (FormatTok.Tok.is(tok::identifier) ||
+ FormatTok.Tok.is(tok::kw___attribute) ||
+ FormatTok.Tok.is(tok::kw___declspec)) {
+ nextToken();
+ // We can have macros or attributes in between 'enum' and the enum name.
+ if (FormatTok.Tok.is(tok::l_paren)) {
+ parseParens();
+ }
+ if (FormatTok.Tok.is(tok::identifier))
+ nextToken();
+ }
+ if (FormatTok.Tok.is(tok::l_brace)) {
+ nextToken();
+ addUnwrappedLine();
+ ++Line->Level;
+ do {
+ switch (FormatTok.Tok.getKind()) {
+ case tok::l_paren:
+ parseParens();
+ break;
+ case tok::r_brace:
+ addUnwrappedLine();
+ nextToken();
+ --Line->Level;
+ return;
+ case tok::comma:
+ nextToken();
+ addUnwrappedLine();
+ break;
+ default:
+ nextToken();
+ break;
+ }
+ } while (!eof());
+ }
+ // We fall through to parsing a structural element afterwards, so that in
+ // enum A {} n, m;
+ // "} n, m;" will end up in one unwrapped line.
+}
+
+void UnwrappedLineParser::parseRecord() {
+ nextToken();
+ if (FormatTok.Tok.is(tok::identifier) ||
+ FormatTok.Tok.is(tok::kw___attribute) ||
+ FormatTok.Tok.is(tok::kw___declspec)) {
+ nextToken();
+ // We can have macros or attributes in between 'class' and the class name.
+ if (FormatTok.Tok.is(tok::l_paren)) {
+ parseParens();
+ }
+ // The actual identifier can be a nested name specifier, and in macros
+ // it is often token-pasted.
+ while (FormatTok.Tok.is(tok::identifier) ||
+ FormatTok.Tok.is(tok::coloncolon) || FormatTok.Tok.is(tok::hashhash))
+ nextToken();
+
+ // Note that parsing away template declarations here leads to incorrectly
+ // accepting function declarations as record declarations.
+ // In general, we cannot solve this problem. Consider:
+ // class A<int> B() {}
+ // which can be a function definition or a class definition when B() is a
+ // macro. If we find enough real-world cases where this is a problem, we
+ // can parse for the 'template' keyword in the beginning of the statement,
+ // and thus rule out the record production in case there is no template
+ // (this would still leave us with an ambiguity between template function
+ // and class declarations).
+ if (FormatTok.Tok.is(tok::colon) || FormatTok.Tok.is(tok::less)) {
+ while (!eof() && FormatTok.Tok.isNot(tok::l_brace)) {
+ if (FormatTok.Tok.is(tok::semi))
+ return;
+ nextToken();
+ }
+ }
+ }
+ if (FormatTok.Tok.is(tok::l_brace))
+ parseBlock(/*MustBeDeclaration=*/ true);
+ // We fall through to parsing a structural element afterwards, so
+ // class A {} n, m;
+ // will end up in one unwrapped line.
+}
+
+void UnwrappedLineParser::parseObjCProtocolList() {
+ assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
+ do
+ nextToken();
+ while (!eof() && FormatTok.Tok.isNot(tok::greater));
+ nextToken(); // Skip '>'.
+}
+
+void UnwrappedLineParser::parseObjCUntilAtEnd() {
+ do {
+ if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
+ nextToken();
+ addUnwrappedLine();
+ break;
+ }
+ parseStructuralElement();
+ } while (!eof());
+}
+
+void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
+ nextToken();
+ nextToken(); // interface name
+
+ // @interface can be followed by either a base class, or a category.
+ if (FormatTok.Tok.is(tok::colon)) {
+ nextToken();
+ nextToken(); // base class name
+ } else if (FormatTok.Tok.is(tok::l_paren))
+ // Skip category, if present.
+ parseParens();
+
+ if (FormatTok.Tok.is(tok::less))
+ parseObjCProtocolList();
+
+ // If instance variables are present, keep the '{' on the first line too.
+ if (FormatTok.Tok.is(tok::l_brace))
+ parseBlock(/*MustBeDeclaration=*/ true);
+
+ // With instance variables, this puts '}' on its own line. Without instance
+ // variables, this ends the @interface line.
+ addUnwrappedLine();
+
+ parseObjCUntilAtEnd();
+}
+
+void UnwrappedLineParser::parseObjCProtocol() {
+ nextToken();
+ nextToken(); // protocol name
+
+ if (FormatTok.Tok.is(tok::less))
+ parseObjCProtocolList();
+
+ // Check for protocol declaration.
+ if (FormatTok.Tok.is(tok::semi)) {
+ nextToken();
+ return addUnwrappedLine();
+ }
+
+ addUnwrappedLine();
+ parseObjCUntilAtEnd();
+}
+
+void UnwrappedLineParser::addUnwrappedLine() {
+ if (Line->Tokens.empty())
+ return;
+ DEBUG({
+ llvm::dbgs() << "Line(" << Line->Level << ")"
+ << (Line->InPPDirective ? " MACRO" : "") << ": ";
+ for (std::list<FormatToken>::iterator I = Line->Tokens.begin(),
+ E = Line->Tokens.end();
+ I != E; ++I) {
+ llvm::dbgs() << I->Tok.getName() << " ";
+
+ }
+ llvm::dbgs() << "\n";
+ });
+ CurrentLines->push_back(*Line);
+ Line->Tokens.clear();
+ if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
+ for (std::vector<UnwrappedLine>::iterator
+ I = PreprocessorDirectives.begin(),
+ E = PreprocessorDirectives.end();
+ I != E; ++I) {
+ CurrentLines->push_back(*I);
+ }
+ PreprocessorDirectives.clear();
+ }
+}
+
+bool UnwrappedLineParser::eof() const { return FormatTok.Tok.is(tok::eof); }
+
+void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
+ bool JustComments = Line->Tokens.empty();
+ for (SmallVectorImpl<FormatToken>::const_iterator
+ I = CommentsBeforeNextToken.begin(),
+ E = CommentsBeforeNextToken.end();
+ I != E; ++I) {
+ if (I->NewlinesBefore && JustComments) {
+ addUnwrappedLine();
+ }
+ pushToken(*I);
+ }
+ if (NewlineBeforeNext && JustComments) {
+ addUnwrappedLine();
+ }
+ CommentsBeforeNextToken.clear();
+}
+
+void UnwrappedLineParser::nextToken() {
+ if (eof())
+ return;
+ flushComments(FormatTok.NewlinesBefore > 0);
+ pushToken(FormatTok);
+ readToken();
+}
+
+void UnwrappedLineParser::readToken() {
+ bool CommentsInCurrentLine = true;
+ do {
+ FormatTok = Tokens->getNextToken();
+ while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
+ ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
+ FormatTok.IsFirst)) {
+ // If there is an unfinished unwrapped line, we flush the preprocessor
+ // directives only after that unwrapped line was finished later.
+ bool SwitchToPreprocessorLines =
+ !Line->Tokens.empty() && CurrentLines == &Lines;
+ ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
+ // Comments stored before the preprocessor directive need to be output
+ // before the preprocessor directive, at the same level as the
+ // preprocessor directive, as we consider them to apply to the directive.
+ flushComments(FormatTok.NewlinesBefore > 0);
+ parsePPDirective();
+ }
+ if (!FormatTok.Tok.is(tok::comment))
+ return;
+ if (FormatTok.NewlinesBefore > 0 || FormatTok.IsFirst) {
+ CommentsInCurrentLine = false;
+ }
+ if (CommentsInCurrentLine) {
+ pushToken(FormatTok);
+ } else {
+ CommentsBeforeNextToken.push_back(FormatTok);
+ }
+ } while (!eof());
+}
+
+void UnwrappedLineParser::pushToken(const FormatToken &Tok) {
+ Line->Tokens.push_back(Tok);
+ if (MustBreakBeforeNextToken) {
+ Line->Tokens.back().MustBreakBefore = true;
+ MustBreakBeforeNextToken = false;
+ }
+}
+
+} // end namespace format
+} // end namespace clang
diff --git a/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
new file mode 100644
index 000000000000..f4fecc5ef0ac
--- /dev/null
+++ b/contrib/llvm/tools/clang/lib/Format/UnwrappedLineParser.h
@@ -0,0 +1,201 @@
+//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the UnwrappedLineParser,
+/// which turns a stream of tokens into UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
+#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H
+
+#include "clang/Basic/IdentifierTable.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Format/Format.h"
+#include "clang/Lex/Lexer.h"
+#include <list>
+
+namespace clang {
+
+class DiagnosticsEngine;
+
+namespace format {
+
+/// \brief A wrapper around a \c Token storing information about the
+/// whitespace characters preceeding it.
+struct FormatToken {
+ FormatToken()
+ : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0),
+ LastNewlineOffset(0), TokenLength(0), IsFirst(false),
+ MustBreakBefore(false) {}
+
+ /// \brief The \c Token.
+ Token Tok;
+
+ /// \brief The number of newlines immediately before the \c Token.
+ ///
+ /// This can be used to determine what the user wrote in the original code
+ /// and thereby e.g. leave an empty line between two function definitions.
+ unsigned NewlinesBefore;
+
+ /// \brief Whether there is at least one unescaped newline before the \c
+ /// Token.
+ bool HasUnescapedNewline;
+
+ /// \brief The location of the start of the whitespace immediately preceeding
+ /// the \c Token.
+ ///
+ /// Used together with \c WhiteSpaceLength to create a \c Replacement.
+ SourceLocation WhiteSpaceStart;
+
+ /// \brief The length in characters of the whitespace immediately preceeding
+ /// the \c Token.
+ unsigned WhiteSpaceLength;
+
+ /// \brief The offset just past the last '\n' in this token's leading
+ /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
+ unsigned LastNewlineOffset;
+
+ /// \brief The length of the non-whitespace parts of the token. This is
+ /// necessary because we need to handle escaped newlines that are stored
+ /// with the token.
+ unsigned TokenLength;
+
+ /// \brief Indicates that this is the first token.
+ bool IsFirst;
+
+ /// \brief Whether there must be a line break before this token.
+ ///
+ /// This happens for example when a preprocessor directive ended directly
+ /// before the token.
+ bool MustBreakBefore;
+};
+
+/// \brief An unwrapped line is a sequence of \c Token, that we would like to
+/// put on a single line if there was no column limit.
+///
+/// This is used as a main interface between the \c UnwrappedLineParser and the
+/// \c UnwrappedLineFormatter. The key property is that changing the formatting
+/// within an unwrapped line does not affect any other unwrapped lines.
+struct UnwrappedLine {
+ UnwrappedLine() : Level(0), InPPDirective(false), MustBeDeclaration(false) {
+ }
+
+ // FIXME: Don't use std::list here.
+ /// \brief The \c Tokens comprising this \c UnwrappedLine.
+ std::list<FormatToken> Tokens;
+
+ /// \brief The indent level of the \c UnwrappedLine.
+ unsigned Level;
+
+ /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive.
+ bool InPPDirective;
+
+ bool MustBeDeclaration;
+};
+
+class UnwrappedLineConsumer {
+public:
+ virtual ~UnwrappedLineConsumer() {
+ }
+ virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0;
+};
+
+class FormatTokenSource {
+public:
+ virtual ~FormatTokenSource() {
+ }
+ virtual FormatToken getNextToken() = 0;
+};
+
+class UnwrappedLineParser {
+public:
+ UnwrappedLineParser(clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
+ FormatTokenSource &Tokens,
+ UnwrappedLineConsumer &Callback);
+
+ /// Returns true in case of a structural error.
+ bool parse();
+
+private:
+ bool parseFile();
+ bool parseLevel(bool HasOpeningBrace);
+ bool parseBlock(bool MustBeDeclaration, unsigned AddLevels = 1);
+ void parsePPDirective();
+ void parsePPDefine();
+ void parsePPUnknown();
+ void parseStructuralElement();
+ void parseBracedList();
+ void parseReturn();
+ void parseParens();
+ void parseIfThenElse();
+ void parseForOrWhileLoop();
+ void parseDoWhile();
+ void parseLabel();
+ void parseCaseLabel();
+ void parseSwitch();
+ void parseNamespace();
+ void parseAccessSpecifier();
+ void parseEnum();
+ void parseRecord();
+ void parseObjCProtocolList();
+ void parseObjCUntilAtEnd();
+ void parseObjCInterfaceOrImplementation();
+ void parseObjCProtocol();
+ void addUnwrappedLine();
+ bool eof() const;
+ void nextToken();
+ void readToken();
+ void flushComments(bool NewlineBeforeNext);
+ void pushToken(const FormatToken &Tok);
+
+ // FIXME: We are constantly running into bugs where Line.Level is incorrectly
+ // subtracted from beyond 0. Introduce a method to subtract from Line.Level
+ // and use that everywhere in the Parser.
+ OwningPtr<UnwrappedLine> Line;
+
+ // Comments are sorted into unwrapped lines by whether they are in the same
+ // line as the previous token, or not. If not, they belong to the next token.
+ // Since the next token might already be in a new unwrapped line, we need to
+ // store the comments belonging to that token.
+ SmallVector<FormatToken, 1> CommentsBeforeNextToken;
+ FormatToken FormatTok;
+ bool MustBreakBeforeNextToken;
+
+ // The parsed lines. Only added to through \c CurrentLines.
+ std::vector<UnwrappedLine> Lines;
+
+ // Preprocessor directives are parsed out-of-order from other unwrapped lines.
+ // Thus, we need to keep a list of preprocessor directives to be reported
+ // after an unwarpped line that has been started was finished.
+ std::vector<UnwrappedLine> PreprocessorDirectives;
+
+ // New unwrapped lines are added via CurrentLines.
+ // Usually points to \c &Lines. While parsing a preprocessor directive when
+ // there is an unfinished previous unwrapped line, will point to
+ // \c &PreprocessorDirectives.
+ std::vector<UnwrappedLine> *CurrentLines;
+
+ // We store for each line whether it must be a declaration depending on
+ // whether we are in a compound statement or not.
+ std::vector<bool> DeclarationScopeStack;
+
+ clang::DiagnosticsEngine &Diag;
+ const FormatStyle &Style;
+ FormatTokenSource *Tokens;
+ UnwrappedLineConsumer &Callback;
+
+ friend class ScopedLineState;
+};
+
+} // end namespace format
+} // end namespace clang
+
+#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H