aboutsummaryrefslogtreecommitdiff
path: root/lib/Lex/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Lex/Lexer.cpp')
-rw-r--r--lib/Lex/Lexer.cpp191
1 files changed, 151 insertions, 40 deletions
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 52a7a04567a6..a91e40435cb0 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -33,7 +33,7 @@
#include <cctype>
using namespace clang;
-static void InitCharacterInfo();
+static void InitCharacterInfo(LangOptions);
//===----------------------------------------------------------------------===//
// Token Class Implementation
@@ -59,7 +59,7 @@ tok::ObjCKeywordKind Token::getObjCKeywordID() const {
void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
const char *BufEnd) {
- InitCharacterInfo();
+ InitCharacterInfo(Features);
BufferStart = BufStart;
BufferPtr = BufPtr;
@@ -70,7 +70,7 @@ void Lexer::InitLexer(const char *BufStart, const char *BufPtr,
" to simplify lexing!");
Is_PragmaLexer = false;
- IsEofCodeCompletion = false;
+ IsInConflictMarker = false;
// Start of the file is a start of line.
IsAtStartOfLine = true;
@@ -105,10 +105,6 @@ Lexer::Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
// Default to keeping comments if the preprocessor wants them.
SetCommentRetentionState(PP.getCommentRetentionState());
-
- // If the input file is truncated, the EOF is a code-completion token.
- if (PP.getSourceManager().isTruncatedFile(FID))
- IsEofCodeCompletion = true;
}
/// Lexer constructor - Create a new raw lexer object. This object is only
@@ -258,7 +254,7 @@ enum {
// Statically initialize CharInfo table based on ASCII character set
// Reference: FreeBSD 7.2 /usr/share/misc/ascii
-static const unsigned char CharInfo[256] =
+static unsigned char CharInfo[256] =
{
// 0 NUL 1 SOH 2 STX 3 ETX
// 4 EOT 5 ENQ 6 ACK 7 BEL
@@ -326,7 +322,7 @@ static const unsigned char CharInfo[256] =
0 , 0 , 0 , 0
};
-static void InitCharacterInfo() {
+static void InitCharacterInfo(LangOptions Features) {
static bool isInited = false;
if (isInited) return;
// check the statically-initialized CharInfo table
@@ -344,6 +340,11 @@ static void InitCharacterInfo() {
}
for (unsigned i = '0'; i <= '9'; ++i)
assert(CHAR_NUMBER == CharInfo[i]);
+
+ if (Features.Microsoft)
+ // Hack to treat DOS & CP/M EOF (^Z) as horizontal whitespace.
+ CharInfo[26/*sub*/] = CHAR_HORZ_WS;
+
isInited = true;
}
@@ -1326,24 +1327,16 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) {
// Otherwise, check if we are code-completing, then issue diagnostics for
// unterminated #if and missing newline.
- if (IsEofCodeCompletion) {
- bool isIntendedFile = true;
- if (PP && FileLoc.isFileID()) {
- SourceManager &SM = PP->getSourceManager();
- isIntendedFile = SM.isTruncatedFile(SM.getFileID(FileLoc));
- }
+ if (PP && PP->isCodeCompletionFile(FileLoc)) {
+ // We're at the end of the file, but we've been asked to consider the
+ // end of the file to be a code-completion token. Return the
+ // code-completion token.
+ Result.startToken();
+ FormTokenWithChars(Result, CurPtr, tok::code_completion);
- if (isIntendedFile) {
- // We're at the end of the file, but we've been asked to consider the
- // end of the file to be a code-completion token. Return the
- // code-completion token.
- Result.startToken();
- FormTokenWithChars(Result, CurPtr, tok::code_completion);
-
- // Only do the eof -> code_completion translation once.
- IsEofCodeCompletion = false;
- return true;
- }
+ // Only do the eof -> code_completion translation once.
+ PP->SetCodeCompletionPoint(0, 0, 0);
+ return true;
}
// If we are in a #if directive, emit an error.
@@ -1398,6 +1391,105 @@ unsigned Lexer::isNextPPTokenLParen() {
return Tok.is(tok::l_paren);
}
+/// FindConflictEnd - Find the end of a version control conflict marker.
+static const char *FindConflictEnd(const char *CurPtr, const char *BufferEnd) {
+ llvm::StringRef RestOfBuffer(CurPtr+7, BufferEnd-CurPtr-7);
+ size_t Pos = RestOfBuffer.find(">>>>>>>");
+ while (Pos != llvm::StringRef::npos) {
+ // Must occur at start of line.
+ if (RestOfBuffer[Pos-1] != '\r' &&
+ RestOfBuffer[Pos-1] != '\n') {
+ RestOfBuffer = RestOfBuffer.substr(Pos+7);
+ continue;
+ }
+ return RestOfBuffer.data()+Pos;
+ }
+ return 0;
+}
+
+/// IsStartOfConflictMarker - If the specified pointer is the start of a version
+/// control conflict marker like '<<<<<<<', recognize it as such, emit an error
+/// and recover nicely. This returns true if it is a conflict marker and false
+/// if not.
+bool Lexer::IsStartOfConflictMarker(const char *CurPtr) {
+ // Only a conflict marker if it starts at the beginning of a line.
+ if (CurPtr != BufferStart &&
+ CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ return false;
+
+ // Check to see if we have <<<<<<<.
+ if (BufferEnd-CurPtr < 8 ||
+ llvm::StringRef(CurPtr, 7) != "<<<<<<<")
+ return false;
+
+ // If we have a situation where we don't care about conflict markers, ignore
+ // it.
+ if (IsInConflictMarker || isLexingRawMode())
+ return false;
+
+ // Check to see if there is a >>>>>>> somewhere in the buffer at the start of
+ // a line to terminate this conflict marker.
+ if (FindConflictEnd(CurPtr+7, BufferEnd)) {
+ // We found a match. We are really in a conflict marker.
+ // Diagnose this, and ignore to the end of line.
+ Diag(CurPtr, diag::err_conflict_marker);
+ IsInConflictMarker = true;
+
+ // Skip ahead to the end of line. We know this exists because the
+ // end-of-conflict marker starts with \r or \n.
+ while (*CurPtr != '\r' && *CurPtr != '\n') {
+ assert(CurPtr != BufferEnd && "Didn't find end of line");
+ ++CurPtr;
+ }
+ BufferPtr = CurPtr;
+ return true;
+ }
+
+ // No end of conflict marker found.
+ return false;
+}
+
+
+/// HandleEndOfConflictMarker - If this is a '=======' or '|||||||' or '>>>>>>>'
+/// marker, then it is the end of a conflict marker. Handle it by ignoring up
+/// until the end of the line. This returns true if it is a conflict marker and
+/// false if not.
+bool Lexer::HandleEndOfConflictMarker(const char *CurPtr) {
+ // Only a conflict marker if it starts at the beginning of a line.
+ if (CurPtr != BufferStart &&
+ CurPtr[-1] != '\n' && CurPtr[-1] != '\r')
+ return false;
+
+ // If we have a situation where we don't care about conflict markers, ignore
+ // it.
+ if (!IsInConflictMarker || isLexingRawMode())
+ return false;
+
+ // Check to see if we have the marker (7 characters in a row).
+ for (unsigned i = 1; i != 7; ++i)
+ if (CurPtr[i] != CurPtr[0])
+ return false;
+
+ // If we do have it, search for the end of the conflict marker. This could
+ // fail if it got skipped with a '#if 0' or something. Note that CurPtr might
+ // be the end of conflict marker.
+ if (const char *End = FindConflictEnd(CurPtr, BufferEnd)) {
+ CurPtr = End;
+
+ // Skip ahead to the end of line.
+ while (CurPtr != BufferEnd && *CurPtr != '\r' && *CurPtr != '\n')
+ ++CurPtr;
+
+ BufferPtr = CurPtr;
+
+ // No longer in the conflict marker.
+ IsInConflictMarker = false;
+ return true;
+ }
+
+ return false;
+}
+
/// LexTokenInternal - This implements a simple C family lexer. It is an
/// extremely performance critical piece of code. This assumes that the buffer
@@ -1764,14 +1856,20 @@ LexNextToken:
Char = getCharAndSize(CurPtr, SizeTmp);
if (ParsingFilename) {
return LexAngledStringLiteral(Result, CurPtr);
- } else if (Char == '<' &&
- getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
- Kind = tok::lesslessequal;
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
} else if (Char == '<') {
- CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- Kind = tok::lessless;
+ char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ if (After == '=') {
+ Kind = tok::lesslessequal;
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ } else if (After == '<' && IsStartOfConflictMarker(CurPtr-1)) {
+ // If this is actually a '<<<<<<<' version control conflict marker,
+ // recognize it as such and recover nicely.
+ goto LexNextToken;
+ } else {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::lessless;
+ }
} else if (Char == '=') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::lessequal;
@@ -1790,14 +1888,20 @@ LexNextToken:
if (Char == '=') {
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
Kind = tok::greaterequal;
- } else if (Char == '>' &&
- getCharAndSize(CurPtr+SizeTmp, SizeTmp2) == '=') {
- CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
- SizeTmp2, Result);
- Kind = tok::greatergreaterequal;
} else if (Char == '>') {
- CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
- Kind = tok::greatergreater;
+ char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
+ if (After == '=') {
+ CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
+ SizeTmp2, Result);
+ Kind = tok::greatergreaterequal;
+ } else if (After == '>' && HandleEndOfConflictMarker(CurPtr-1)) {
+ // If this is '>>>>>>>' and we're in a conflict marker, ignore it.
+ goto LexNextToken;
+ } else {
+ CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+ Kind = tok::greatergreater;
+ }
+
} else {
Kind = tok::greater;
}
@@ -1817,6 +1921,9 @@ LexNextToken:
Kind = tok::pipeequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else if (Char == '|') {
+ // If this is '|||||||' and we're in a conflict marker, ignore it.
+ if (CurPtr[1] == '|' && HandleEndOfConflictMarker(CurPtr-1))
+ goto LexNextToken;
Kind = tok::pipepipe;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {
@@ -1841,6 +1948,10 @@ LexNextToken:
case '=':
Char = getCharAndSize(CurPtr, SizeTmp);
if (Char == '=') {
+ // If this is '=======' and we're in a conflict marker, ignore it.
+ if (CurPtr[1] == '=' && HandleEndOfConflictMarker(CurPtr-1))
+ goto LexNextToken;
+
Kind = tok::equalequal;
CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
} else {