aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWarner Losh <imp@FreeBSD.org>2024-05-14 18:15:43 +0000
committerWarner Losh <imp@FreeBSD.org>2024-05-14 18:17:55 +0000
commiteb690a0576e8cf5412124477eace3bb765068c3d (patch)
treeb1fbad2c6376c9554d37c0039eaab283aa55d8e8
parent305db91d4b92a5d53826dcb6df39fc8a1f3b427e (diff)
parent887b27736b04cef8c4ce6f123c5f4bdc12a8bae4 (diff)
downloadsrc-eb690a0576e8cf5412124477eace3bb765068c3d.tar.gz
src-eb690a0576e8cf5412124477eace3bb765068c3d.zip
awk: Merge in bsd-feature branch of OTA from 20240422 (31bb33a32f71)
In the last 2nd edition import, I mistakenly grabbed from the 'main' branch of upstream rather than the bsd-feature branch. This means that we have a regression in awk from that point forward: all the BSD-specific bit functions (and a few others) were dropped. This restores it at the same level. MFC After: 1 day Sponsored by: Netflix
-rw-r--r--contrib/one-true-awk/ChangeLog24
-rw-r--r--contrib/one-true-awk/awk.152
-rw-r--r--contrib/one-true-awk/awk.h8
-rw-r--r--contrib/one-true-awk/awkgram.y20
-rw-r--r--contrib/one-true-awk/lex.c9
-rw-r--r--contrib/one-true-awk/maketab.c1
-rw-r--r--contrib/one-true-awk/parse.c23
-rw-r--r--contrib/one-true-awk/proto.h3
-rw-r--r--contrib/one-true-awk/run.c240
9 files changed, 377 insertions, 3 deletions
diff --git a/contrib/one-true-awk/ChangeLog b/contrib/one-true-awk/ChangeLog
index 6ce9417c10da..dea4ed7e3187 100644
--- a/contrib/one-true-awk/ChangeLog
+++ b/contrib/one-true-awk/ChangeLog
@@ -47,6 +47,30 @@
* test/T.lilly: Remove gawk warnings from output, improves
portability.
+2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
+
+ Pull in systime() and strftime() from the NetBSD awk.
+
+ * awk.1: Document the functions.
+ * run.c (bltin): Implement the functions.
+ * awk.h: Add defines for systime and strftime.
+ * lex.c: Add support for systime and strftime.
+
+2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
+
+ Integrate features from different *BSD versions of awk.
+ Gensub support from NetBSD. Bitwise functions from OpenBSD.
+
+ * awk.h: Add defines for and, or, xor, compl, lshift and rshift.
+ * awkgram.y: Add support for gensub.
+ * maketab.c: Ditto.
+ * lex.c: Add support for gensub and bitwise functions.
+ * parse.c (node5, op5): New functions.
+ * proto.h (node5, op5): New declarations.
+ * run.c (bltin): Implement the bitwise functions.
+ (gensub): New function.
+ * awk.1: Document additional functions.
+
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
* b.c (fnematch): Change type of pbuf from unsigned char to char.
diff --git a/contrib/one-true-awk/awk.1 b/contrib/one-true-awk/awk.1
index ef40a0104468..496a2a652379 100644
--- a/contrib/one-true-awk/awk.1
+++ b/contrib/one-true-awk/awk.1
@@ -305,6 +305,25 @@ and
.B gsub
return the number of replacements.
.TP
+\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
+replaces instances of
+.I pat
+in
+.I target
+with
+.IR repl .
+If
+.I how
+is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
+.I how
+is a number indicating which occurrence to replace. If no
+.IR target ,
+use
+.BR $0 .
+Return the resulting string;
+.I target
+is not modified.
+.TP
.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
@@ -313,6 +332,28 @@ according to the
format
.IR fmt .
.TP
+.B systime()
+returns the current date and time as a standard
+``seconds since the epoch'' value.
+.TP
+.BI strftime( fmt ", " timestamp\^ )
+formats
+.I timestamp
+(a value in seconds since the epoch)
+according to
+.IR fmt ,
+which is a format string as supported by
+.IR strftime (3).
+Both
+.I timestamp
+and
+.I fmt
+may be omitted; if no
+.IR timestamp ,
+the current time of day is used, and if no
+.IR fmt ,
+a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
+.TP
.BI system( cmd )
executes
.I cmd
@@ -372,6 +413,17 @@ In all cases,
returns 1 for a successful input,
0 for end of file, and \-1 for an error.
.PP
+The functions
+.BR compl ,
+.BR and ,
+.BR or ,
+.BR xor ,
+.BR lshift ,
+and
+.B rshift
+peform the corresponding bitwise operations on their
+operands, which are first truncated to integer.
+.PP
Patterns are arbitrary Boolean combinations
(with
.BR "! || &&" )
diff --git a/contrib/one-true-awk/awk.h b/contrib/one-true-awk/awk.h
index 76180e47f16a..740447ee2167 100644
--- a/contrib/one-true-awk/awk.h
+++ b/contrib/one-true-awk/awk.h
@@ -154,6 +154,14 @@ extern Cell *symtabloc; /* SYMTAB */
#define FTOUPPER 12
#define FTOLOWER 13
#define FFLUSH 14
+#define FAND 15
+#define FFOR 16
+#define FXOR 17
+#define FCOMPL 18
+#define FLSHIFT 19
+#define FRSHIFT 20
+#define FSYSTIME 21
+#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */
diff --git a/contrib/one-true-awk/awkgram.y b/contrib/one-true-awk/awkgram.y
index db804e117e19..233253a4307b 100644
--- a/contrib/one-true-awk/awkgram.y
+++ b/contrib/one-true-awk/awkgram.y
@@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
-%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
+%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
@@ -377,6 +377,24 @@ term:
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
+ | GENSUB '(' reg_expr comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
+ | GENSUB '(' pattern comma pattern comma pattern ')'
+ { if (constnode($3)) {
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
+ free($3);
+ } else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
+ }
+ | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
+ { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
+ | GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
+ { if (constnode($3)) {
+ $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
+ free($3);
+ } else
+ $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
+ }
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
diff --git a/contrib/one-true-awk/lex.c b/contrib/one-true-awk/lex.c
index 0473a338c906..141cc81d2b59 100644
--- a/contrib/one-true-awk/lex.c
+++ b/contrib/one-true-awk/lex.c
@@ -47,9 +47,11 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "BEGIN", XBEGIN, XBEGIN },
{ "END", XEND, XEND },
{ "NF", VARNF, VARNF },
+ { "and", FAND, BLTIN },
{ "atan2", FATAN, BLTIN },
{ "break", BREAK, BREAK },
{ "close", CLOSE, CLOSE },
+ { "compl", FCOMPL, BLTIN },
{ "continue", CONTINUE, CONTINUE },
{ "cos", FCOS, BLTIN },
{ "delete", DELETE, DELETE },
@@ -61,6 +63,7 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "for", FOR, FOR },
{ "func", FUNC, FUNC },
{ "function", FUNC, FUNC },
+ { "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB },
{ "if", IF, IF },
@@ -69,24 +72,30 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "int", FINT, BLTIN },
{ "length", FLENGTH, BLTIN },
{ "log", FLOG, BLTIN },
+ { "lshift", FLSHIFT, BLTIN },
{ "match", MATCHFCN, MATCHFCN },
{ "next", NEXT, NEXT },
{ "nextfile", NEXTFILE, NEXTFILE },
+ { "or", FFOR, BLTIN },
{ "print", PRINT, PRINT },
{ "printf", PRINTF, PRINTF },
{ "rand", FRAND, BLTIN },
{ "return", RETURN, RETURN },
+ { "rshift", FRSHIFT, BLTIN },
{ "sin", FSIN, BLTIN },
{ "split", SPLIT, SPLIT },
{ "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN },
+ { "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN },
+ { "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE },
+ { "xor", FXOR, BLTIN },
};
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
diff --git a/contrib/one-true-awk/maketab.c b/contrib/one-true-awk/maketab.c
index 3747efa03702..3a80c87725ac 100644
--- a/contrib/one-true-awk/maketab.c
+++ b/contrib/one-true-awk/maketab.c
@@ -104,6 +104,7 @@ struct xx
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" },
+ { GENSUB, "gensub", "gensub" },
{ 0, "", "" },
};
diff --git a/contrib/one-true-awk/parse.c b/contrib/one-true-awk/parse.c
index 14608be7570a..2b7fd1928930 100644
--- a/contrib/one-true-awk/parse.c
+++ b/contrib/one-true-awk/parse.c
@@ -93,6 +93,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = nodealloc(5);
+ x->nobj = a;
+ x->narg[0] = b;
+ x->narg[1] = c;
+ x->narg[2] = d;
+ x->narg[3] = e;
+ x->narg[4] = f;
+ return(x);
+}
+
Node *stat1(int a, Node *b)
{
Node *x;
@@ -165,6 +179,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
+Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
+{
+ Node *x;
+
+ x = node5(a,b,c,d,e,f);
+ x->ntype = NEXPR;
+ return(x);
+}
+
Node *celltonode(Cell *a, int b)
{
Node *x;
diff --git a/contrib/one-true-awk/proto.h b/contrib/one-true-awk/proto.h
index ed63e7875da3..b44f9e7a5599 100644
--- a/contrib/one-true-awk/proto.h
+++ b/contrib/one-true-awk/proto.h
@@ -73,12 +73,14 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *);
+extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *);
extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
+extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int);
@@ -197,6 +199,7 @@ extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *dosub(Node **, int);
+extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);
diff --git a/contrib/one-true-awk/run.c b/contrib/one-true-awk/run.c
index 99306992df41..9c61b1a1c558 100644
--- a/contrib/one-true-awk/run.c
+++ b/contrib/one-true-awk/run.c
@@ -2062,12 +2062,14 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
{
Cell *x, *y;
Awkfloat u;
- int t;
+ int t, sz;
Awkfloat tmp;
- char *buf;
+ char *buf, *fmt;
Node *nextarg;
FILE *fp;
int status = 0;
+ time_t tv;
+ struct tm *tm;
int estatus = 0;
t = ptoi(a[0]);
@@ -2109,6 +2111,64 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
nextarg = nextarg->nnext;
}
break;
+ case FCOMPL:
+ u = ~((int)getfval(x));
+ break;
+ case FAND:
+ if (nextarg == 0) {
+ WARNING("and requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) & ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FFOR:
+ if (nextarg == 0) {
+ WARNING("or requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) | ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FXOR:
+ if (nextarg == 0) {
+ WARNING("xor requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) ^ ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FLSHIFT:
+ if (nextarg == 0) {
+ WARNING("lshift requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) << ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
+ case FRSHIFT:
+ if (nextarg == 0) {
+ WARNING("rshift requires two arguments; returning 0");
+ u = 0;
+ break;
+ }
+ y = execute(a[1]->nnext);
+ u = ((int)getfval(x)) >> ((int)getfval(y));
+ tempfree(y);
+ nextarg = nextarg->nnext;
+ break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
estatus = status = system(getsval(x));
@@ -2163,6 +2223,41 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else
u = fflush(fp);
break;
+ case FSYSTIME:
+ u = time((time_t *) 0);
+ break;
+ case FSTRFTIME:
+ /* strftime([format [,timestamp]]) */
+ if (nextarg) {
+ y = execute(nextarg);
+ nextarg = nextarg->nnext;
+ tv = (time_t) getfval(y);
+ tempfree(y);
+ } else
+ tv = time((time_t *) 0);
+ tm = localtime(&tv);
+ if (tm == NULL)
+ FATAL("bad time %ld", (long)tv);
+
+ if (isrec(x)) {
+ /* format argument not provided, use default */
+ fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
+ } else
+ fmt = tostring(getsval(x));
+
+ sz = 32;
+ buf = NULL;
+ do {
+ if ((buf = realloc(buf, (sz *= 2))) == NULL)
+ FATAL("out of memory in strftime");
+ } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
+
+ y = gettemp();
+ setsval(y, buf);
+ free(fmt);
+ free(buf);
+
+ return y;
default: /* can't happen */
FATAL("illegal function type %d", t);
break;
@@ -2542,6 +2637,147 @@ next_search:
return x;
}
+Cell *gensub(Node **a, int nnn) /* global selective substitute */
+ /* XXX incomplete - doesn't support backreferences \0 ... \9 */
+{
+ Cell *x, *y, *res, *h;
+ char *rptr;
+ const char *sptr;
+ char *buf, *pb;
+ const char *t, *q;
+ fa *pfa;
+ int mflag, tempstat, num, whichm;
+ int bufsz = recsize;
+
+ if ((buf = malloc(bufsz)) == NULL)
+ FATAL("out of memory in gensub");
+ mflag = 0; /* if mflag == 0, can replace empty string */
+ num = 0;
+ x = execute(a[4]); /* source string */
+ t = getsval(x);
+ res = copycell(x); /* target string - initially copy of source */
+ res->csub = CTEMP; /* result values are temporary */
+ if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
+ pfa = (fa *) a[1]; /* regular expression */
+ else {
+ y = execute(a[1]);
+ pfa = makedfa(getsval(y), 1);
+ tempfree(y);
+ }
+ y = execute(a[2]); /* replacement string */
+ h = execute(a[3]); /* which matches should be replaced */
+ sptr = getsval(h);
+ if (sptr[0] == 'g' || sptr[0] == 'G')
+ whichm = -1;
+ else {
+ /*
+ * The specified number is index of replacement, starting
+ * from 1. GNU awk treats index lower than 0 same as
+ * 1, we do same for compatibility.
+ */
+ whichm = (int) getfval(h) - 1;
+ if (whichm < 0)
+ whichm = 0;
+ }
+ tempfree(h);
+
+ if (pmatch(pfa, t)) {
+ char *sl;
+
+ tempstat = pfa->initstat;
+ pfa->initstat = 2;
+ pb = buf;
+ rptr = getsval(y);
+ /*
+ * XXX if there are any backreferences in subst string,
+ * complain now.
+ */
+ for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
+ if (strchr("0123456789", sl[1])) {
+ FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
+ }
+ }
+
+ do {
+ if (whichm >= 0 && whichm != num) {
+ num++;
+ adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
+
+ /* copy the part of string up to and including
+ * match to output buffer */
+ while (t < patbeg + patlen)
+ *pb++ = *t++;
+ continue;
+ }
+
+ if (patlen == 0 && *patbeg != 0) { /* matched empty string */
+ if (mflag == 0) { /* can replace empty */
+ num++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ }
+ if (*t == 0) /* at end */
+ goto done;
+ adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
+ *pb++ = *t++;
+ if (pb > buf + bufsz) /* BUG: not sure of this test */
+ FATAL("gensub result0 %.30s too big; can't happen", buf);
+ mflag = 0;
+ }
+ else { /* matched nonempty string */
+ num++;
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
+ while (sptr < patbeg)
+ *pb++ = *sptr++;
+ sptr = rptr;
+ while (*sptr != 0) {
+ adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
+ if (*sptr == '\\') {
+ backsub(&pb, &sptr);
+ } else if (*sptr == '&') {
+ sptr++;
+ adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
+ for (q = patbeg; q < patbeg+patlen; )
+ *pb++ = *q++;
+ } else
+ *pb++ = *sptr++;
+ }
+ t = patbeg + patlen;
+ if (patlen == 0 || *t == 0 || *(t-1) == 0)
+ goto done;
+ if (pb > buf + bufsz)
+ FATAL("gensub result1 %.30s too big; can't happen", buf);
+ mflag = 1;
+ }
+ } while (pmatch(pfa,t));
+ sptr = t;
+ adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
+ while ((*pb++ = *sptr++) != 0)
+ ;
+ done: if (pb > buf + bufsz)
+ FATAL("gensub result2 %.30s too big; can't happen", buf);
+ *pb = '\0';
+ setsval(res, buf);
+ pfa->initstat = tempstat;
+ }
+ tempfree(x);
+ tempfree(y);
+ free(buf);
+ return(res);
+}
+
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
char *pb = *pb_ptr;