diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:11:37 +0000 |
commit | 461a67fa15370a9ec88f8f8a240bf7c123bb2029 (patch) | |
tree | 6942083d7d56bba40ec790a453ca58ad3baf6832 /lib/Lex/TokenLexer.cpp | |
parent | 75c3240472ba6ac2669ee72ca67eb72d4e2851fc (diff) |
Vendor import of clang trunk r321017:vendor/clang/clang-trunk-r321017
Notes
Notes:
svn path=/vendor/clang/dist/; revision=326941
svn path=/vendor/clang/clang-trunk-r321017/; revision=326942; tag=vendor/clang/clang-trunk-r321017
Diffstat (limited to 'lib/Lex/TokenLexer.cpp')
-rw-r--r-- | lib/Lex/TokenLexer.cpp | 296 |
1 files changed, 237 insertions, 59 deletions
diff --git a/lib/Lex/TokenLexer.cpp b/lib/Lex/TokenLexer.cpp index c2e49ba919a9..d7f1c7a93fda 100644 --- a/lib/Lex/TokenLexer.cpp +++ b/lib/Lex/TokenLexer.cpp @@ -1,4 +1,4 @@ -//===--- TokenLexer.cpp - Lex from a token stream -------------------------===// +//===- TokenLexer.cpp - Lex from a token stream ---------------------------===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,25 @@ //===----------------------------------------------------------------------===// #include "clang/Lex/TokenLexer.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/IdentifierTable.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Lex/LexDiagnostic.h" +#include "clang/Lex/Lexer.h" #include "clang/Lex/MacroArgs.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Lex/Token.h" +#include "clang/Lex/VariadicMacroSupport.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include <cassert> +#include <cstring> using namespace clang; @@ -31,7 +44,7 @@ void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, Macro = MI; ActualArgs = Actuals; - CurToken = 0; + CurTokenIdx = 0; ExpandLocStart = Tok.getLocation(); ExpandLocEnd = ELEnd; @@ -90,7 +103,7 @@ void TokenLexer::Init(const Token *TokArray, unsigned NumToks, OwnsTokens = ownsTokens; DisableMacroExpansion = disableMacroExpansion; NumTokens = NumToks; - CurToken = 0; + CurTokenIdx = 0; ExpandLocStart = ExpandLocEnd = SourceLocation(); AtStartOfLine = false; HasLeadingSpace = false; @@ -168,6 +181,59 @@ bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( return true; } +void TokenLexer::stringifyVAOPTContents( + SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx, + const SourceLocation VAOPTClosingParenLoc) { + const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt(); + const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt; + Token *const VAOPTTokens = + NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr; + + SmallVector<Token, 64> ConcatenatedVAOPTResultToks; + // FIXME: Should we keep track within VCtx that we did or didnot + // encounter pasting - and only then perform this loop. + + // Perform token pasting (concatenation) prior to stringization. + for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens; + ++CurTokenIdx) { + if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) { + assert(CurTokenIdx != 0 && + "Can not have __VAOPT__ contents begin with a ##"); + Token &LHS = VAOPTTokens[CurTokenIdx - 1]; + pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), + CurTokenIdx); + // Replace the token prior to the first ## in this iteration. + ConcatenatedVAOPTResultToks.back() = LHS; + if (CurTokenIdx == NumVAOptTokens) + break; + } + ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]); + } + + ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok()); + // Get the SourceLocation that represents the start location within + // the macro definition that marks where this string is substituted + // into: i.e. the __VA_OPT__ and the ')' within the spelling of the + // macro definition, and use it to indicate that the stringified token + // was generated from that location. + const SourceLocation ExpansionLocStartWithinMacro = + getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc()); + const SourceLocation ExpansionLocEndWithinMacro = + getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc); + + Token StringifiedVAOPT = MacroArgs::StringifyArgument( + &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/, + ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro); + + if (VCtx.getLeadingSpaceForStringifiedToken()) + StringifiedVAOPT.setFlag(Token::LeadingSpace); + + StringifiedVAOPT.setFlag(Token::StringifiedInMacro); + // Resize (shrink) the token stream to just capture this stringified token. + ResultToks.resize(NumToksPriorToVAOpt + 1); + ResultToks.back() = StringifiedVAOPT; +} + /// Expand the arguments of a function-like macro so that we can quickly /// return preexpanded tokens from Tokens. void TokenLexer::ExpandFunctionArguments() { @@ -178,28 +244,117 @@ void TokenLexer::ExpandFunctionArguments() { // we install the newly expanded sequence as the new 'Tokens' list. bool MadeChange = false; - for (unsigned i = 0, e = NumTokens; i != e; ++i) { - // If we found the stringify operator, get the argument stringified. The - // preprocessor already verified that the following token is a macro name - // when the #define was parsed. - const Token &CurTok = Tokens[i]; + const bool CalledWithVariadicArguments = + ActualArgs->invokedWithVariadicArgument(Macro); + + VAOptExpansionContext VCtx(PP); + + for (unsigned I = 0, E = NumTokens; I != E; ++I) { + const Token &CurTok = Tokens[I]; // We don't want a space for the next token after a paste // operator. In valid code, the token will get smooshed onto the // preceding one anyway. In assembler-with-cpp mode, invalid // pastes are allowed through: in this case, we do not want the // extra whitespace to be added. For example, we want ". ## foo" // -> ".foo" not ". foo". - if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) + if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) NextTokGetsSpace = true; - if (CurTok.isOneOf(tok::hash, tok::hashat)) { - int ArgNo = Macro->getParameterNum(Tokens[i+1].getIdentifierInfo()); - assert(ArgNo != -1 && "Token following # is not an argument?"); + if (VCtx.isVAOptToken(CurTok)) { + MadeChange = true; + assert(Tokens[I + 1].is(tok::l_paren) && + "__VA_OPT__ must be followed by '('"); + + ++I; // Skip the l_paren + VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), + ResultToks.size()); + + continue; + } + // We have entered into the __VA_OPT__ context, so handle tokens + // appropriately. + if (VCtx.isInVAOpt()) { + // If we are about to process a token that is either an argument to + // __VA_OPT__ or its closing rparen, then: + // 1) If the token is the closing rparen that exits us out of __VA_OPT__, + // perform any necessary stringification or placemarker processing, + // and/or skip to the next token. + // 2) else if macro was invoked without variadic arguments skip this + // token. + // 3) else (macro was invoked with variadic arguments) process the token + // normally. + + if (Tokens[I].is(tok::l_paren)) + VCtx.sawOpeningParen(Tokens[I].getLocation()); + // Continue skipping tokens within __VA_OPT__ if the macro was not + // called with variadic arguments, else let the rest of the loop handle + // this token. Note sawClosingParen() returns true only if the r_paren matches + // the closing r_paren of the __VA_OPT__. + if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { + if (!CalledWithVariadicArguments) { + // Skip this token. + continue; + } + // ... else the macro was called with variadic arguments, and we do not + // have a closing rparen - so process this token normally. + } else { + // Current token is the closing r_paren which marks the end of the + // __VA_OPT__ invocation, so handle any place-marker pasting (if + // empty) by removing hashhash either before (if exists) or after. And + // also stringify the entire contents if VAOPT was preceded by a hash, + // but do so only after any token concatenation that needs to occur + // within the contents of VAOPT. + + if (VCtx.hasStringifyOrCharifyBefore()) { + // Replace all the tokens just added from within VAOPT into a single + // stringified token. This requires token-pasting to eagerly occur + // within these tokens. If either the contents of VAOPT were empty + // or the macro wasn't called with any variadic arguments, the result + // is a token that represents an empty string. + stringifyVAOPTContents(ResultToks, VCtx, + /*ClosingParenLoc*/ Tokens[I].getLocation()); + + } else if (/*No tokens within VAOPT*/ !( + ResultToks.size() - VCtx.getNumberOfTokensPriorToVAOpt())) { + // Treat VAOPT as a placemarker token. Eat either the '##' before the + // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that + // hashhash was not a placemarker) or the '##' + // after VAOPT, but not both. + + if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { + ResultToks.pop_back(); + } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { + ++I; // Skip the following hashhash. + } + } + VCtx.reset(); + // We processed __VA_OPT__'s closing paren (and the exit out of + // __VA_OPT__), so skip to the next token. + continue; + } + } + + // If we found the stringify operator, get the argument stringified. The + // preprocessor already verified that the following token is a macro + // parameter or __VA_OPT__ when the #define was lexed. + + if (CurTok.isOneOf(tok::hash, tok::hashat)) { + int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); + assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && + "Token following # is not an argument or __VA_OPT__!"); + + if (ArgNo == -1) { + // Handle the __VA_OPT__ case. + VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, + CurTok.is(tok::hashat)); + continue; + } + // Else handle the simple argument case. SourceLocation ExpansionLocStart = getExpansionLocForMacroDefLoc(CurTok.getLocation()); SourceLocation ExpansionLocEnd = - getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation()); + getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); Token Res; if (CurTok.is(tok::hash)) // Stringify @@ -222,7 +377,7 @@ void TokenLexer::ExpandFunctionArguments() { ResultToks.push_back(Res); MadeChange = true; - ++i; // Skip arg name. + ++I; // Skip arg name. NextTokGetsSpace = false; continue; } @@ -230,9 +385,11 @@ void TokenLexer::ExpandFunctionArguments() { // Find out if there is a paste (##) operator before or after the token. bool NonEmptyPasteBefore = !ResultToks.empty() && ResultToks.back().is(tok::hashhash); - bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash); - bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash); - assert(!NonEmptyPasteBefore || PasteBefore); + bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); + bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); + + assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && + "unexpected ## in ResultToks"); // Otherwise, if this is not an argument token, just add the token to the // output buffer. @@ -275,7 +432,7 @@ void TokenLexer::ExpandFunctionArguments() { // avoids some work in common cases. const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) - ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0]; + ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; else ResultArgToks = ArgTok; // Use non-preexpanded tokens. @@ -374,7 +531,7 @@ void TokenLexer::ExpandFunctionArguments() { if (PasteAfter) { // Discard the argument token and skip (don't copy to the expansion // buffer) the paste operator after it. - ++i; + ++I; continue; } @@ -384,7 +541,13 @@ void TokenLexer::ExpandFunctionArguments() { assert(PasteBefore); if (NonEmptyPasteBefore) { assert(ResultToks.back().is(tok::hashhash)); - ResultToks.pop_back(); + // Do not remove the paste operator if it is the one before __VA_OPT__ + // (and we are still processing tokens within VA_OPT). We handle the case + // of removing the paste operator if __VA_OPT__ reduces to the notional + // placemarker above when we encounter the closing paren of VA_OPT. + if (!VCtx.isInVAOpt() || + ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) + ResultToks.pop_back(); } // If this is the __VA_ARGS__ token, and if the argument wasn't provided, @@ -420,7 +583,6 @@ static bool isWideStringLiteralFromMacro(const Token &FirstTok, } /// Lex - Lex and return a token from this macro stream. -/// bool TokenLexer::Lex(Token &Tok) { // Lexing off the end of the macro, pop this macro off the expansion stack. if (isAtEnd()) { @@ -431,7 +593,7 @@ bool TokenLexer::Lex(Token &Tok) { Tok.startToken(); Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); - if (CurToken == 0) + if (CurTokenIdx == 0) Tok.setFlag(Token::LeadingEmptyMacro); return PP.HandleEndOfTokenLexer(Tok); } @@ -440,25 +602,25 @@ bool TokenLexer::Lex(Token &Tok) { // If this is the first token of the expanded result, we inherit spacing // properties later. - bool isFirstToken = CurToken == 0; + bool isFirstToken = CurTokenIdx == 0; // Get the next token to return. - Tok = Tokens[CurToken++]; + Tok = Tokens[CurTokenIdx++]; bool TokenIsFromPaste = false; // If this token is followed by a token paste (##) operator, paste the tokens! // Note that ## is a normal token when not expanding a macro. if (!isAtEnd() && Macro && - (Tokens[CurToken].is(tok::hashhash) || + (Tokens[CurTokenIdx].is(tok::hashhash) || // Special processing of L#x macros in -fms-compatibility mode. // Microsoft compiler is able to form a wide string literal from // 'L#macro_arg' construct in a function-like macro. (PP.getLangOpts().MSVCCompat && - isWideStringLiteralFromMacro(Tok, Tokens[CurToken])))) { + isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) { // When handling the microsoft /##/ extension, the final token is - // returned by PasteTokens, not the pasted token. - if (PasteTokens(Tok)) + // returned by pasteTokens, not the pasted token. + if (pasteTokens(Tok)) return true; TokenIsFromPaste = true; @@ -521,40 +683,57 @@ bool TokenLexer::Lex(Token &Tok) { return true; } -/// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ## +bool TokenLexer::pasteTokens(Token &Tok) { + return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); +} + +/// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there -/// are more ## after it, chomp them iteratively. Return the result as Tok. +/// are more ## after it, chomp them iteratively. Return the result as LHSTok. /// If this returns true, the caller should immediately return the token. -bool TokenLexer::PasteTokens(Token &Tok) { +bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, + unsigned int &CurIdx) { + assert(CurIdx > 0 && "## can not be the first token within tokens"); + assert((TokenStream[CurIdx].is(tok::hashhash) || + (PP.getLangOpts().MSVCCompat && + isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) && + "Token at this Index must be ## or part of the MSVC 'L " + "#macro-arg' pasting pair"); + // MSVC: If previous token was pasted, this must be a recovery from an invalid // paste operation. Ignore spaces before this token to mimic MSVC output. // Required for generating valid UUID strings in some MS headers. - if (PP.getLangOpts().MicrosoftExt && (CurToken >= 2) && - Tokens[CurToken - 2].is(tok::hashhash)) - Tok.clearFlag(Token::LeadingSpace); + if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) && + TokenStream[CurIdx - 2].is(tok::hashhash)) + LHSTok.clearFlag(Token::LeadingSpace); SmallString<128> Buffer; const char *ResultTokStrPtr = nullptr; - SourceLocation StartLoc = Tok.getLocation(); + SourceLocation StartLoc = LHSTok.getLocation(); SourceLocation PasteOpLoc; + + auto IsAtEnd = [&TokenStream, &CurIdx] { + return TokenStream.size() == CurIdx; + }; + do { // Consume the ## operator if any. - PasteOpLoc = Tokens[CurToken].getLocation(); - if (Tokens[CurToken].is(tok::hashhash)) - ++CurToken; - assert(!isAtEnd() && "No token on the RHS of a paste operator!"); + PasteOpLoc = TokenStream[CurIdx].getLocation(); + if (TokenStream[CurIdx].is(tok::hashhash)) + ++CurIdx; + assert(!IsAtEnd() && "No token on the RHS of a paste operator!"); // Get the RHS token. - const Token &RHS = Tokens[CurToken]; + const Token &RHS = TokenStream[CurIdx]; // Allocate space for the result token. This is guaranteed to be enough for // the two tokens. - Buffer.resize(Tok.getLength() + RHS.getLength()); + Buffer.resize(LHSTok.getLength() + RHS.getLength()); // Get the spelling of the LHS token in Buffer. const char *BufPtr = &Buffer[0]; bool Invalid = false; - unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid); + unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid); if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! memcpy(&Buffer[0], BufPtr, LHSLen); if (Invalid) @@ -586,7 +765,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Lex the resultant pasted token into Result. Token Result; - if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) { + if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) { // Common paste case: identifier+identifier = identifier. Avoid creating // a lexer and other overhead. PP.IncrementPasteCounter(true); @@ -626,7 +805,7 @@ bool TokenLexer::PasteTokens(Token &Tok) { isInvalid |= Result.is(tok::eof); // If pasting the two tokens didn't form a full new token, this is an - // error. This occurs with "x ## +" and other stuff. Return with Tok + // error. This occurs with "x ## +" and other stuff. Return with LHSTok // unmodified and with RHS as the next token to lex. if (isInvalid) { // Explicitly convert the token location to have proper expansion @@ -637,9 +816,9 @@ bool TokenLexer::PasteTokens(Token &Tok) { // Test for the Microsoft extension of /##/ turning into // here on the // error path. - if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) && + if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) && RHS.is(tok::slash)) { - HandleMicrosoftCommentPaste(Tok, Loc); + HandleMicrosoftCommentPaste(LHSTok, Loc); return true; } @@ -664,15 +843,15 @@ bool TokenLexer::PasteTokens(Token &Tok) { } // Transfer properties of the LHS over the Result. - Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine()); - Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace()); + Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine()); + Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace()); // Finally, replace LHS with the result, consume the RHS, and iterate. - ++CurToken; - Tok = Result; - } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash)); + ++CurIdx; + LHSTok = Result; + } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); - SourceLocation EndLoc = Tokens[CurToken - 1].getLocation(); + SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation(); // The token's current location indicate where the token was lexed from. We // need this information to compute the spelling of the token, but any @@ -690,16 +869,16 @@ bool TokenLexer::PasteTokens(Token &Tok) { while (SM.getFileID(EndLoc) != MacroFID) EndLoc = SM.getImmediateExpansionRange(EndLoc).second; - Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc, - Tok.getLength())); + LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, + LHSTok.getLength())); // Now that we got the result token, it will be subject to expansion. Since // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. - if (Tok.is(tok::raw_identifier)) { + if (LHSTok.is(tok::raw_identifier)) { // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. - PP.LookUpIdentifierInfo(Tok); + PP.LookUpIdentifierInfo(LHSTok); } return false; } @@ -711,7 +890,7 @@ unsigned TokenLexer::isNextTokenLParen() const { // Out of tokens? if (isAtEnd()) return 2; - return Tokens[CurToken].is(tok::l_paren); + return Tokens[CurTokenIdx].is(tok::l_paren); } /// isParsingPreprocessorDirective - Return true if we are in the middle of a @@ -831,9 +1010,8 @@ static void updateConsecutiveMacroArgTokens(SourceManager &SM, /// \brief Creates SLocEntries and updates the locations of macro argument /// tokens to their new expanded locations. /// -/// \param ArgIdDefLoc the location of the macro argument id inside the macro +/// \param ArgIdSpellLoc the location of the macro argument id inside the macro /// definition. -/// \param Tokens the macro argument tokens to update. void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, Token *begin_tokens, Token *end_tokens) { |