aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm/lib/Support/Regex.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Support/Regex.cpp')
-rw-r--r--contrib/llvm/lib/Support/Regex.cpp177
1 files changed, 177 insertions, 0 deletions
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
new file mode 100644
index 000000000000..541364184073
--- /dev/null
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -0,0 +1,177 @@
+//===-- Regex.cpp - Regular Expression matcher implementation -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a POSIX regular expression matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Regex.h"
+#include "regex_impl.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+using namespace llvm;
+
+Regex::Regex(StringRef regex, unsigned Flags) {
+ unsigned flags = 0;
+ preg = new llvm_regex();
+ preg->re_endp = regex.end();
+ if (Flags & IgnoreCase)
+ flags |= REG_ICASE;
+ if (Flags & Newline)
+ flags |= REG_NEWLINE;
+ if (!(Flags & BasicRegex))
+ flags |= REG_EXTENDED;
+ error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
+}
+
+Regex::~Regex() {
+ llvm_regfree(preg);
+ delete preg;
+}
+
+bool Regex::isValid(std::string &Error) {
+ if (!error)
+ return true;
+
+ size_t len = llvm_regerror(error, preg, NULL, 0);
+
+ Error.resize(len - 1);
+ llvm_regerror(error, preg, &Error[0], len);
+ return false;
+}
+
+/// getNumMatches - In a valid regex, return the number of parenthesized
+/// matches it contains.
+unsigned Regex::getNumMatches() const {
+ return preg->re_nsub;
+}
+
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+ unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
+
+ // pmatch needs to have at least one element.
+ SmallVector<llvm_regmatch_t, 8> pm;
+ pm.resize(nmatch > 0 ? nmatch : 1);
+ pm[0].rm_so = 0;
+ pm[0].rm_eo = String.size();
+
+ int rc = llvm_regexec(preg, String.data(), nmatch, pm.data(), REG_STARTEND);
+
+ if (rc == REG_NOMATCH)
+ return false;
+ if (rc != 0) {
+ // regexec can fail due to invalid pattern or running out of memory.
+ error = rc;
+ return false;
+ }
+
+ // There was a match.
+
+ if (Matches) { // match position requested
+ Matches->clear();
+
+ for (unsigned i = 0; i != nmatch; ++i) {
+ if (pm[i].rm_so == -1) {
+ // this group didn't match
+ Matches->push_back(StringRef());
+ continue;
+ }
+ assert(pm[i].rm_eo >= pm[i].rm_so);
+ Matches->push_back(StringRef(String.data()+pm[i].rm_so,
+ pm[i].rm_eo-pm[i].rm_so));
+ }
+ }
+
+ return true;
+}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
+
+bool Regex::isLiteralERE(StringRef Str) {
+ // Check for regex metacharacters. This list was derived from our regex
+ // implementation in regcomp.c and double checked against the POSIX extended
+ // regular expression specification.
+ return Str.find_first_of("()^$|*+?.[]\\{}") == StringRef::npos;
+}