aboutsummaryrefslogtreecommitdiff
path: root/usr.bin/grep
diff options
context:
space:
mode:
authorEd Maste <emaste@FreeBSD.org>2017-04-21 14:36:09 +0000
committerEd Maste <emaste@FreeBSD.org>2017-04-21 14:36:09 +0000
commit3f39ffc893961a8e73eea2bb70fed8ed11e0edd2 (patch)
treed3cede57898b94e45c2548f87a2ab2b0c9925dbe /usr.bin/grep
parentb98b5ae8eceb6939b1ba74ab27e4398fbad47a97 (diff)
downloadsrc-3f39ffc893961a8e73eea2bb70fed8ed11e0edd2.tar.gz
src-3f39ffc893961a8e73eea2bb70fed8ed11e0edd2.zip
bsdgrep: add BSD_GREP_FASTMATCH knob for built-in fastmatch
Bugs have been found in the fastmatch implementation as used in bsdgrep. Some have been fixed (r316495) while fixes for others are in review (D10098). In comparison with the fastmatch implementation, Kyle Evans found that: - regex(3)'s performance with literal expressions offers a speed improvement over fastmatch - regex(3)'s performance, both with simple BREs and EREs, seems to be comparable The regex implementation was imported in r226035, and the commit message reports: This is a temporary solution until the whole regex library is not replaced so that BSD grep development can continue and the backported code gets some review and testing. This change only improves scalability slightly, there is no big performance boost yet but several minor bugs have been found and fixed. Introduce a WITH_/WITHOUT_BSD_GREP_FASTMATCH knob to support testing of both approaches. PR: 175314, 194823 Submitted by: Kyle Evans <kevans91 at ksu.edu> Reviewed by: bdrewery (in part) Differential Revision: https://reviews.freebsd.org/D10282
Notes
Notes: svn path=/head/; revision=317254
Diffstat (limited to 'usr.bin/grep')
-rw-r--r--usr.bin/grep/Makefile6
-rw-r--r--usr.bin/grep/grep.c25
-rw-r--r--usr.bin/grep/grep.h4
-rw-r--r--usr.bin/grep/util.c8
4 files changed, 34 insertions, 9 deletions
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
index 7c676591f83b..c6b25f06182d 100644
--- a/usr.bin/grep/Makefile
+++ b/usr.bin/grep/Makefile
@@ -15,10 +15,14 @@ bsdgrep.1: grep.1
.endif
SRCS= file.c grep.c queue.c util.c
-# Extra files ported backported form some regex improvements
+.if ${MK_BSD_GREP_FASTMATCH} == "yes"
+# Extra files ported backported for some regex improvements
.PATH: ${.CURDIR}/regex
SRCS+= fastmatch.c hashtable.c tre-compile.c tre-fastmatch.c
CFLAGS+=-I${.CURDIR}/regex
+.else
+CFLAGS+= -DWITHOUT_FASTMATCH
+.endif
CFLAGS.gcc+= --param max-inline-insns-single=500
diff --git a/usr.bin/grep/grep.c b/usr.bin/grep/grep.c
index be0a7355c968..20aec53f59cc 100644
--- a/usr.bin/grep/grep.c
+++ b/usr.bin/grep/grep.c
@@ -49,7 +49,9 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <unistd.h>
+#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
+#endif
#include "grep.h"
#ifndef WITHOUT_NLS
@@ -86,7 +88,9 @@ unsigned int patterns;
static unsigned int pattern_sz;
struct pat *pattern;
regex_t *r_pattern;
+#ifndef WITHOUT_FASTMATCH
fastmatch_t *fg_pattern;
+#endif
/* Filename exclusion/inclusion patterns */
unsigned int fpatterns, dpatterns;
@@ -715,20 +719,25 @@ main(int argc, char *argv[])
usage();
}
+#ifndef WITHOUT_FASTMATCH
fg_pattern = grep_calloc(patterns, sizeof(*fg_pattern));
+#endif
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
/* Check if cheating is allowed (always is for fgrep). */
for (i = 0; i < patterns; ++i) {
+#ifndef WITHOUT_FASTMATCH
+ /* Attempt compilation with fastmatch regex and fallback to
+ regex(3) if it fails. */
if (fastncomp(&fg_pattern[i], pattern[i].pat,
- pattern[i].len, cflags) != 0) {
- /* Fall back to full regex library */
- c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
- if (c != 0) {
- regerror(c, &r_pattern[i], re_error,
- RE_ERROR_BUF);
- errx(2, "%s", re_error);
- }
+ pattern[i].len, cflags) == 0)
+ continue;
+#endif
+ c = regcomp(&r_pattern[i], pattern[i].pat, cflags);
+ if (c != 0) {
+ regerror(c, &r_pattern[i], re_error,
+ RE_ERROR_BUF);
+ errx(2, "%s", re_error);
}
}
diff --git a/usr.bin/grep/grep.h b/usr.bin/grep/grep.h
index c82e6bb9cbc9..ea17af4099dd 100644
--- a/usr.bin/grep/grep.h
+++ b/usr.bin/grep/grep.h
@@ -36,7 +36,9 @@
#include <stdio.h>
#include <zlib.h>
+#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
+#endif
#ifdef WITHOUT_NLS
#define getstr(n) errstr[n]
@@ -127,7 +129,9 @@ extern unsigned int dpatterns, fpatterns, patterns;
extern struct pat *pattern;
extern struct epat *dpattern, *fpattern;
extern regex_t *er_pattern, *r_pattern;
+#ifndef WITHOUT_FASTMATCH
extern fastmatch_t *fg_pattern;
+#endif
/* For regex errors */
#define RE_ERROR_BUF 512
diff --git a/usr.bin/grep/util.c b/usr.bin/grep/util.c
index 285f88364d3f..8815d4f34227 100644
--- a/usr.bin/grep/util.c
+++ b/usr.bin/grep/util.c
@@ -49,7 +49,9 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
+#ifndef WITHOUT_FASTMATCH
#include "fastmatch.h"
+#endif
#include "grep.h"
static int linesqueued;
@@ -317,10 +319,12 @@ procline(struct str *l, int nottext)
for (i = 0; i < patterns; i++) {
pmatch.rm_so = st;
pmatch.rm_eo = l->len;
+#ifndef WITHOUT_FASTMATCH
if (fg_pattern[i].pattern)
r = fastexec(&fg_pattern[i],
l->dat, 1, &pmatch, leflags);
else
+#endif
r = regexec(&r_pattern[i], l->dat, 1,
&pmatch, leflags);
r = (r == 0) ? 0 : REG_NOMATCH;
@@ -332,7 +336,11 @@ procline(struct str *l, int nottext)
(size_t)pmatch.rm_eo != l->len)
r = REG_NOMATCH;
/* Check for whole word match */
+#ifndef WITHOUT_FASTMATCH
if (r == 0 && (wflag || fg_pattern[i].word)) {
+#else
+ if (r == 0 && wflag) {
+#endif
wchar_t wbegin, wend;
wbegin = wend = L' ';