diff options
author | Warner Losh <imp@FreeBSD.org> | 2021-07-07 23:03:17 +0000 |
---|---|---|
committer | Warner Losh <imp@FreeBSD.org> | 2021-07-07 23:09:41 +0000 |
commit | 746b7396bb3e85208573892a0f314e0b4e7dacf4 (patch) | |
tree | c14cfbfbe60fa840441df8e856727eba63075d4e | |
parent | 03ee4d05f1d963d60451e04ce505e4da116300db (diff) |
one-true-awk: import 20210221 (1e4bc42c53a1) which fixes a number of bugsvendor/one-true-awk/1e4bc42c53a1
Import the latest bsd-features branch of the one-true-awk upstream:
o Move to bison for $YACC
o Set close-on-exec flag for file and pipe redirects that aren't std*
o lots of little fixes to modernize ocde base
o free sval member before setting it
o fix a bug where a{0,3} could match aaaa
o pull in systime and strftime from NetBSD awk
o pull in fixes from {Net,Free,Open}BSD
o add BSD extensions and, or, xor, compl, lsheift, rshift
Sponsored by: Netflix
-rw-r--r-- | ChangeLog | 108 | ||||
-rw-r--r-- | FIXES | 261 | ||||
-rwxr-xr-x | REGRESS | 4 | ||||
-rw-r--r-- | awk.1 | 151 | ||||
-rw-r--r-- | awk.h | 61 | ||||
-rw-r--r-- | awkgram.y | 48 | ||||
-rw-r--r-- | b.c | 469 | ||||
-rw-r--r-- | bugs-fixed/missing-precision.ok | 2 | ||||
-rw-r--r-- | bugs-fixed/negative-nf.ok | 2 | ||||
-rw-r--r-- | lex.c | 100 | ||||
-rw-r--r-- | lib.c | 328 | ||||
-rw-r--r-- | main.c | 165 | ||||
-rw-r--r-- | makefile | 74 | ||||
-rw-r--r-- | maketab.c | 66 | ||||
-rw-r--r-- | parse.c | 31 | ||||
-rw-r--r-- | proctab.c | 202 | ||||
-rw-r--r-- | proto.h | 41 | ||||
-rw-r--r-- | run.c | 982 | ||||
-rw-r--r-- | tran.c | 166 |
19 files changed, 2272 insertions, 989 deletions
diff --git a/ChangeLog b/ChangeLog index fd03b2bbca0b..dea4ed7e3187 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,111 @@ +2020-07-30 Arnold D. Robbins <arnold@skeeve.com> + + By fiat, we use bison for $(YACC). Trying to accommodate + different versions didn't work. + + * makefile: Significant cleanup. Replace all ytab* references + with awkgram.tab.* and simplify definition of YACC. + * .gitignore: Remove ytab* references. + * b.c, lex.c, maketab.c, parse.c, run.c: Replace include of ytab.h + with awkgram.tab.h. + * lib.c, main.c, tran.c: Remove include of ytab.h, wasn't needed. + +2020-01-20 Arnold D. Robbins <arnold@skeeve.com> + + * run.c (openfile): Set the close-on-exec flag for file + and pipe redirections that aren't stdin/stdout/stderr. + +2020-01-06 Arnold D. Robbins <arnold@skeeve.com> + + Minor fixes. + * b.c (replace_repeat): Turn init_q back into an int. + * lex.c (string): Use \a instead of \007. + * tran.c (catstr): Use snprintf instead of sprintf. + +2020-01-01 Arnold D. Robbins <arnold@skeeve.com> + + * tran.c (syminit, arginit, envinit): Free sval member before + setting it. Thanks to valgrind. + * b.c: Small formatting cleanups in several routines. + +2019-12-27 Arnold D. Robbins <arnold@skeeve.com> + + * b.c (replace_repeat): Fix a bug whereby a{0,3} could match + four a's. Thanks to Anonymous AWK fan <awkfan77@mailfence.com> + for the report. Also, minor code formatting cleanups. + * testdir/T.int-expr: New file. + +2019-12-11 Arnold D. Robbins <arnold@skeeve.com> + + * README: Renamed to ... + * README.md: ... this. Cleaned up some as well, + including moving to Markdown. + +2019-11-08 Arnold D. Robbins <arnold@skeeve.com> + + * test/T.chem: Use $oldawk instead of hardwiring 'awk'. + * test/T.lilly: Remove gawk warnings from output, improves + portability. + +2019-10-17 Arnold D. Robbins <arnold@skeeve.com> + + Pull in systime() and strftime() from the NetBSD awk. + + * awk.1: Document the functions. + * run.c (bltin): Implement the functions. + * awk.h: Add defines for systime and strftime. + * lex.c: Add support for systime and strftime. + +2019-10-07 Arnold D. Robbins <arnold@skeeve.com> + + Integrate features from different *BSD versions of awk. + Gensub support from NetBSD. Bitwise functions from OpenBSD. + + * awk.h: Add defines for and, or, xor, compl, lshift and rshift. + * awkgram.y: Add support for gensub. + * maketab.c: Ditto. + * lex.c: Add support for gensub and bitwise functions. + * parse.c (node5, op5): New functions. + * proto.h (node5, op5): New declarations. + * run.c (bltin): Implement the bitwise functions. + (gensub): New function. + * awk.1: Document additional functions. + +2019-10-07 Arnold D. Robbins <arnold@skeeve.com> + + * b.c (fnematch): Change type of pbuf from unsigned char to char. + * proto.h (fnematch): Ditto. + +2019-10-06 Arnold D. Robbins <arnold@skeeve.com> + + * lib.c (readrec): Allow RS a regular expression. Imported + the code from the NetBSD awk. + * b.c (fnematch): New function for implementing the feature. + * awk.1: Updated. + * main.c (version): Updated. + +2019-06-24 Arnold D. Robbins <arnold@skeeve.com> + + * makefile: Revise to take into account there is no more awktest.tar, + add targets 'check' and 'test', and also 'testclean' to clean up + after test run. Have 'clean' and 'cleaner' depend upon 'testclean'. + +2019-06-23 Arnold D. Robbins <arnold@skeeve.com> + + * testdir: Extracted from awktest.tar and added to Git. + * awktest.tar: Removed. + +2019-06-06 Arnold D. Robbins <arnold@skeeve.com> + + * awk.1: Fix a typo, minor edits. + +2019-06-05 Arnold D. Robbins <arnold@skeeve.com> + + * b.c (relex): Count parentheses and treat umatched right paren + as a literal character. + * awktest.tar (testdir/T.re): Added a test case. + * main.c (version): Updated. + 2019-05-29 Arnold D. Robbins <arnold@skeeve.com> * lib.c (isclvar): Remove check for additional '=' after @@ -25,6 +25,229 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August, 1987. +February 15, 2021: + Small fix so that awk will compile again with g++. Thanks to + Arnold Robbins. + +January 06, 2021: + Fix a decision bug with trailing stuff in lib.c:is_valid_number + after recent changes. Thanks to Ozan Yigit. + +December 18, 2020: + Fix problems converting inf and NaN values in lib.c:is_valid_number. + Enhance number to string conversion to do the right thing for + NaN and inf values. Things are now pretty much the same as in + gawk. (Found a gawk bug while we're at it.) Added a torture + test for these values. Thanks to Arnold Robbins. Allows closing + of PR #101. + +December 15, 2020: + Merge PR #99, which gets the right header for strcasecmp. + Thanks to GitHub user michaelforney. + +December 8, 2020: + Merge PR #98: Disallow hex data. Allow only +nan, -nan, + +inf, -inf (case independent) to give NaN and infinity values. + Improve things so that string to double conversion is only + done once, yielding something of a speedup. This obviate + PR #95. Thanks to Arnold Robbins. + +December 3, 2020: + Fix to argument parsing to avoid printing spurious newlines. + Thanks to Todd Miller. Merges PR #97. + +October 13, 2020: + Add casts before all the calls to malloc/calloc/realloc in order + to get it to compile with g++. Thanks to Arnold Robbins. + +August 16, 2020: + Additional fixes for DJGPP. Thanks to Eli Zaretskii for + the testing. + +August 7, 2020: + Merge PR #93, which adds casts to (void*) for debug prints + using the %p format specifier. Thanks to GitHub user YongHaoWu + ("Chris") for the fixes. + +August 4, 2020: + In run.c, use non-restartable multibyte routines to attain + portability to DJGPP. Should fix Issue 92. Thanks to Albert Wik + for the report and to Todd Miller for the suggested fix. + +July 30, 2020: + Merge PRs 88-91 which fix small bugs. Thanks to Todd Miller and + Tim van der Molen for the fixes. + + In order to make life easier, we move exclusively to bison + as the parser generator. + +July 2, 2020: + Merge PRs 85 and 86 which fix regressions. Thanks to + Tim van der Molen for the fixes. + +June 25, 2020: + Merge PRs 82 and 84. The latter fixes issue #83. Thanks to + Todd Miller and awkfan77. + +June 12, 2020: + Clear errno before calling errcheck to avoid any spurious errors + left over from previous calls that may have set it. Thanks to + Todd Miller for the fix, from PR #80. + + Fix Issue #78 by allowing \r to follow floating point numbers in + lib.c:is_number. Thanks to GitHub user ajcarr for the report + and to Arnold Robbins for the fix. + +June 5, 2020: + In fldbld(), make sure that inputFS is set before trying to + use it. Thanks to Steffen Nurpmeso <steffen@sdaoden.eu> + for the report. + +May 5, 2020: + Fix checks for compilers that can handle noreturn. Thanks to + GitHub user enh-google for pointing it out. Closes Issue #79. + +April 16, 2020: + Handle old compilers that don't support C11 (for noreturn). + Thanks to Arnold Robbins. + +April 5, 2020: + Use <stdnoreturn.h> and noreturn instead of GCC attributes. + Thanks to GitHub user awkfan77. Closes PR #77. + +February 28, 2020: + More cleanups from Christos Zoulas: notably backslash continuation + inside strings removes the newline and a fix for RS = "^a". + Fix for address sanitizer-found problem. Thanks to GitHub user + enh-google. + +February 19, 2020: + More small cleanups from Christos Zoulas. + +February 18, 2020: + Additional cleanups from Christos Zoulas. It's no longer necessary + to use the -y flag to bison. + +February 6, 2020: + Additional small cleanups from Christos Zoulas. awk is now + a little more robust about reporting I/O errors upon exit. + +January 31, 2020: + Merge PR #70, which avoids use of variable length arrays. Thanks + to GitHub user michaelforney. Fix issue #60 ({0} in interval + expressions doesn't work). Also get all tests working again. + Thanks to Arnold Robbins. + +January 24, 2020: + A number of small cleanups from Christos Zoulas. Add the close + on exec flag to files/pipes opened for redirection; courtesy of + Arnold Robbins. + +January 19, 2020: + If POSIXLY_CORRECT is set in the environment, then sub and gsub + use POSIX rules for multiple backslashes. This fixes Issue #66, + while maintaining backwards compatibility. + +January 9, 2020: + Input/output errors on closing files are now fatal instead of + mere warnings. Thanks to Martijn Dekker <martijn@inlv.org>. + +January 5, 2020: + Fix a bug in the concatentation of two string constants into + one done in the grammar. Fixes GitHub issue #61. Thanks + to GitHub user awkfan77 for pointing out the direction for + the fix. New test T.concat added to the test suite. + Fix a few memory leaks reported by valgrind, as well. + +December 27, 2019: + Fix a bug whereby a{0,3} could match four a's. Thanks to + "Anonymous AWK fan" for the report. + +December 11, 2019: + Further printf-related fixes for 32 bit systems. + Thanks again to Christos Zoulas. + +December 8, 2019: + Fix the return value of sprintf("%d") on 32 bit systems. + Thanks to Jim Lowe for the report and to Christos Zoulas + for the fix. + +November 10, 2019: + Convert a number of Boolean integer variables into + actual bools. Convert compile_time variable into an + enum and simplify some of the related code. Thanks + to Arnold Robbins. + +November 8, 2019: + Fix from Ori Bernstein to get UTF-8 characters instead of + bytes when FS = "". This is currently the only bit of + the One True Awk that understands multibyte characters. + From Arnold Robbins, apply some cleanups in the test suite. + +October 25, 2019: + More fixes and cleanups from NetBSD, courtesy of Christos + Zoulas. Merges PRs 54 and 55. + +October 24, 2019: + Import second round of code cleanups from NetBSD. Much thanks + to Christos Zoulas (GitHub user zoulasc). Merges PR 53. + Add an optimization for string concatenation, also from + Christos. + +October 17, 2019: + Import code cleanups from NetBSD. Much thanks to Christos + Zoulas (GitHub user zoulasc). Merges PR 51. + +October 6, 2019: + Import code from NetBSD awk that implements RS as a regular + expression. + +September 10, 2019: + Fixes for various array / memory overruns found via gcc's + -fsanitize=unknown. Thanks to Alexander Richardson (GitHub + user arichardson). Merges PRs 47 and 48. + +July 28, 2019: + Import grammar optimization from NetBSD: Two string constants + concatenated together get turned into a single string. + +July 26, 2019: + Support POSIX-specified C-style escape sequences "\a" (alarm) + and "\v" (vertical tab) in command line arguments and regular + expressions, further to the support for them in strings added on + Apr 9, 1989. These now no longer match as literal "a" and "v" + characters (as they don't on other awk implementations). + Thanks to Martijn Dekker. + +July 17, 2019: + Pull in a number of code cleanups and minor fixes from + Warner Losh's bsd-ota branch. The only user visible change + is the use of random(3) as the random number generator. + Thanks to Warner Losh for collecting all these fixes in + one easy place to get them from. + +July 16, 2019: + Fix field splitting to use FS value as of the time a record + was read or assigned to. Thanks to GitHub user Cody Mello (melloc) + for the fix. (Merged from his branch, via PR #42.) Updated + testdir/T.split per said PR as well. + +June 24, 2019: + Extract awktest.tar into testdir directory. Add some very + simple mechanics to the makefile for running the tests and + for cleaning up. No changes to awk itself. + +June 17, 2019: + Disallow deleting SYMTAB and its elements, which creates + use-after-free bugs. Thanks to GitHub user Cody Mello (melloc) + for the fix. (Merged from PR #43.) + +June 5, 2019: + Allow unmatched right parenthesis in a regular expression to + be treated literally. Fixes Issue #40. Thanks to GitHub user + Warner Losh (bsdimp) for the report. Thanks to Arnold Robbins + for the fix. + May 29,2019: Fix check for command line arguments to no longer require that first character after '=' not be another '='. Reverts change of @@ -34,7 +257,7 @@ May 29,2019: Apr 7, 2019: Update awktest.tar(p.50) to use modern options to sort. Needed for Android development. Thanks to GitHub user mohd-akram (Mohamed - Akram). From Comment #33. + Akram). From Issue #33. Mar 12, 2019: Added very simplistic support for cross-compiling in the @@ -54,7 +277,7 @@ Mar 3, 2019: #12: Avoid undefined behaviour when using ctype(3) functions in relex(). Thanks to GitHub user iamleot. #31: Make getline handle numeric strings, and update FIXES. Thanks - to GitHub user arnoldrobbins + to GitHub user arnoldrobbins. #32: maketab: support build systems with read-only source. Thanks to GitHub user enh. @@ -159,10 +382,10 @@ Jun 12, 2011: /pat/, \n /pat/ {...} is now legal, though bad style to use. added checks to new -v code that permits -vnospace; thanks to - ruslan ermilov for spotting this and providing the patch. + ruslan ermilov for spotting this and providing the patch. removed fixed limit on number of open files; thanks to aleksey - cheusov and christos zoulos. + cheusov and christos zoulos. fixed day 1 bug that resurrected deleted elements of ARGV when used as filenames (in lib.c). @@ -180,10 +403,10 @@ May 1, 2011: and arnold robbins, changed srand() to return the previous seed (which is 1 on the first call of srand). the seed is an Awkfloat internally though converted to unsigned int to - pass to the library srand(). thanks, everyone. + pass to the library srand(). thanks, everyone. fixed a subtle (and i hope low-probability) overflow error - in fldbld, by adding space for one extra \0. thanks to + in fldbld, by adding space for one extra \0. thanks to robert bassett for spotting this one and providing a fix. removed the files related to compilation on windows. i no @@ -220,7 +443,7 @@ Oct 8, 2008: Oct 23, 2007: minor fix in lib.c: increase inputFS to 100, change malloc - for fields to n+1. + for fields to n+1. fixed memory fault caused by out of order test in setsval. @@ -267,7 +490,7 @@ Jan 17, 2006: core dump on linux with BEGIN {nextfile}, now fixed. - removed some #ifdef's in run.c and lex.c that appear to no + removed some #ifdef's in run.c and lex.c that appear to no longer be necessary. Apr 24, 2005: @@ -281,8 +504,8 @@ Jan 14, 2005: rethinking it. Dec 31, 2004: - prevent overflow of -f array in main, head off potential error in - call of SYNTAX(), test malloc return in lib.c, all with thanks to + prevent overflow of -f array in main, head off potential error in + call of SYNTAX(), test malloc return in lib.c, all with thanks to todd miller. Dec 22, 2004: @@ -310,8 +533,8 @@ Nov 22, 2003: code known to man. fixed a storage leak in call() that appears to have been there since - 1983 or so -- a function without an explicit return that assigns a - string to a parameter leaked a Cell. thanks to moinak ghosh for + 1983 or so -- a function without an explicit return that assigns a + string to a parameter leaked a Cell. thanks to moinak ghosh for spotting this very subtle one. Jul 31, 2003: @@ -333,7 +556,7 @@ Jul 28, 2003: radix character in programs and command line arguments regardless of the locale; otherwise, the locale should prevail for input and output of numbers. so it's intended to work that way. - + i have rescinded the attempt to use strcoll in expanding shorthands in regular expressions (cclenter). its properties are much too surprising; for example [a-c] matches aAbBc in locale en_US but abBcC @@ -397,7 +620,7 @@ Nov 29, 2002: Jun 28, 2002: modified run/format() and tran/getsval() to do a slightly better job on using OFMT for output from print and CONVFMT for other - number->string conversions, as promised by posix and done by + number->string conversions, as promised by posix and done by gawk and mawk. there are still places where it doesn't work right if CONVFMT is changed; by then the STR attribute of the variable has been irrevocably set. thanks to arnold robbins for @@ -429,7 +652,7 @@ Feb 10, 2002: Jan 1, 2002: fflush() or fflush("") flushes all files and pipes. - length(arrayname) returns number of elements; thanks to + length(arrayname) returns number of elements; thanks to arnold robbins for suggestion. added a makefile.win to make it easier to build on windows. @@ -479,7 +702,7 @@ July 5, 2000: May 25, 2000: yet another attempt at making 8-bit input work, with another - band-aid in b.c (member()), and some (uschar) casts to head + band-aid in b.c (member()), and some (uschar) casts to head off potential errors in subscripts (like isdigit). also changed HAT to NCHARS-2. thanks again to santiago vila. @@ -526,7 +749,7 @@ Apr 21, 1999: the test case.) Apr 16, 1999: - with code kindly provided by Bruce Lilly, awk now parses + with code kindly provided by Bruce Lilly, awk now parses /=/ and similar constructs more sensibly in more places. Bruce also provided some helpful test cases. @@ -583,7 +806,7 @@ Jan 13, 1999: Oct 19, 1998: fixed a couple of bugs in getrec: could fail to update $0 - after a getline var; because inputFS wasn't initialized, + after a getline var; because inputFS wasn't initialized, could split $0 on every character, a misleading diversion. fixed caching bug in makedfa: LRU was actually removing @@ -731,7 +954,7 @@ May 2, 1996: input file. (thanks to arnold robbins for inspiration and code). small fixes to regexpr code: can now handle []], [[], and - variants; [] is now a syntax error, rather than matching + variants; [] is now a syntax error, rather than matching everything; [z-a] is now empty, not z. far from complete or correct, however. (thanks to jeffrey friedl for pointing out some awful behaviors.) @@ -33,3 +33,7 @@ then fi REGRESS + +cd .. +cd bugs-fixed +REGRESS @@ -7,6 +7,10 @@ .fi .ft 1 .. +.de TF +.IP "" "\w'\fB\\$1\ \ \fP'u" +.PD 0 +.. .TH AWK 1 .CT 1 files prog_other .SH NAME @@ -48,7 +52,7 @@ matches the pattern. Each line is matched against the pattern portion of every pattern-action statement; the associated action is performed for each matched pattern. -The file name +The file name .B \- means the standard input. Any @@ -90,7 +94,7 @@ A pattern-action statement has the form: .IP .IB pattern " { " action " } .PP -A missing +A missing .BI { " action " } means print the line; a missing pattern always matches. @@ -209,7 +213,7 @@ or length of if no argument. .TP .B rand -random number on (0,1) +random number on [0,1). .TP .B srand sets seed for @@ -217,7 +221,7 @@ sets seed for and returns the previous seed. .TP .B int -truncates to an integer value +truncates to an integer value. .TP \fBsubstr(\fIs\fB, \fIm\fR [\fB, \fIn\^\fR]\fB)\fR the @@ -225,12 +229,11 @@ the substring of .I s that begins at position -.I m +.I m counted from 1. If no -.IR m , -use the rest of the string -.I +.IR n , +use the rest of the string. .TP .BI index( s , " t" ) the position in @@ -294,6 +297,25 @@ and .B gsub return the number of replacements. .TP +\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR +replaces instances of +.I pat +in +.I target +with +.IR repl . +If +.I how +is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise, +.I how +is a number indicating which occurrence to replace. If no +.IR target , +use +.BR $0 . +Return the resulting string; +.I target +is not modified. +.TP .BI sprintf( fmt , " expr" , " ...\fB) the string resulting from formatting .I expr ... @@ -302,13 +324,35 @@ according to the format .IR fmt . .TP +.B systime() +returns the current date and time as a standard +``seconds since the epoch'' value. +.TP +.BI strftime( fmt ", " timestamp\^ ) +formats +.I timestamp +(a value in seconds since the epoch) +according to +.IR fmt , +which is a format string as supported by +.IR strftime (3). +Both +.I timestamp +and +.I fmt +may be omitted; if no +.IR timestamp , +the current time of day is used, and if no +.IR fmt , +a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used. +.TP .BI system( cmd ) executes .I cmd and returns its exit status. This will be \-1 upon error, .IR cmd 's exit status upon a normal exit, -256 + +256 + .I sig upon death-by-signal, where .I sig @@ -361,13 +405,24 @@ In all cases, returns 1 for a successful input, 0 for end of file, and \-1 for an error. .PP +The functions +.BR compl , +.BR and , +.BR or , +.BR xor , +.BR lshift , +and +.B rshift +peform the corresponding bitwise operations on their +operands, which are first truncated to integer. +.PP Patterns are arbitrary Boolean combinations (with .BR "! || &&" ) of regular expressions and relational expressions. Regular expressions are as in -.IR egrep ; +.IR egrep ; see .IR grep (1). Isolated regular expressions @@ -479,6 +534,11 @@ the length of a string matched by .TP .B RS input record separator (default newline). +If empty, blank lines separate records. +If more than one character long, +.B RS +is treated as a regular expression, and records are +separated by text matching the expression. .TP .B RSTART the start position of a string matched by @@ -498,6 +558,16 @@ functions may be called recursively. Parameters are local to the function; all other variables are global. Thus local variables may be created by providing excess parameters in the function definition. +.SH ENVIRONMENT VARIABLES +If +.B POSIXLY_CORRECT +is set in the environment, then +.I awk +follows the POSIX rules for +.B sub +and +.B gsub +with respect to consecutive backslashes and ampersands. .SH EXAMPLES .TP .EX @@ -542,8 +612,8 @@ BEGIN { # Simulate echo(1) .fi .EE .SH SEE ALSO -.IR grep (1), -.IR lex (1), +.IR grep (1), +.IR lex (1), .IR sed (1) .br A. V. Aho, B. W. Kernighan, P. J. Weinberger, @@ -554,8 +624,61 @@ There are no explicit conversions between numbers and strings. To force an expression to be treated as a number add 0 to it; to force it to be treated as a string concatenate \&\f(CW""\fP to it. -.br +.PP The scope rules for variables in functions are a botch; the syntax is worse. -.br +.PP Only eight-bit characters sets are handled correctly. +.SH UNUSUAL FLOATING-POINT VALUES +.I Awk +was designed before IEEE 754 arithmetic defined Not-A-Number (NaN) +and Infinity values, which are supported by all modern floating-point +hardware. +.PP +Because +.I awk +uses +.IR strtod (3) +and +.IR atof (3) +to convert string values to double-precision floating-point values, +modern C libraries also convert strings starting with +.B inf +and +.B nan +into infinity and NaN values respectively. This led to strange results, +with something like this: +.PP +.EX +.nf +echo nancy | awk '{ print $1 + 0 }' +.fi +.EE +.PP +printing +.B nan +instead of zero. +.PP +.I Awk +now follows GNU AWK, and prefilters string values before attempting +to convert them to numbers, as follows: +.TP +.I "Hexadecimal values" +Hexadecimal values (allowed since C99) convert to zero, as they did +prior to C99. +.TP +.I "NaN values" +The two strings +.B +nan +and +.B \-nan +(case independent) convert to NaN. No others do. +(NaNs can have signs.) +.TP +.I "Infinity values" +The two strings +.B +inf +and +.B \-inf +(case independent) convert to positive and negative infinity, respectively. +No others do. @@ -23,6 +23,13 @@ THIS SOFTWARE. ****************************************************************/ #include <assert.h> +#include <stdint.h> +#include <stdbool.h> +#if __STDC_VERSION__ <= 199901L +#define noreturn +#else +#include <stdnoreturn.h> +#endif typedef double Awkfloat; @@ -30,24 +37,34 @@ typedef double Awkfloat; typedef unsigned char uschar; -#define xfree(a) { if ((a) != NULL) { free((void *) (a)); (a) = NULL; } } +#define xfree(a) { if ((a) != NULL) { free((void *)(intptr_t)(a)); (a) = NULL; } } +/* + * We sometimes cheat writing read-only pointers to NUL-terminate them + * and then put back the original value + */ +#define setptr(ptr, a) (*(char *)(intptr_t)(ptr)) = (a) -#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf +#define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for DPRINTF */ #define DEBUG #ifdef DEBUG - /* uses have to be doubly parenthesized */ -# define dprintf(x) if (dbg) printf x +# define DPRINTF(...) if (dbg) printf(__VA_ARGS__) #else -# define dprintf(x) +# define DPRINTF(...) #endif -extern int compile_time; /* 1 if compiling, 0 if running */ -extern int safe; /* 0 => unsafe, 1 => safe */ +extern enum compile_states { + RUNNING, + COMPILING, + ERROR_PRINTING +} compile_time; + +extern bool safe; /* false => unsafe, true => safe */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ extern char **FS; extern char **RS; extern char **ORS; @@ -64,13 +81,11 @@ extern Awkfloat *RLENGTH; extern char *record; /* points to $0 */ extern int lineno; /* line number in awk program */ extern int errorflag; /* 1 if error has occurred */ -extern int donefld; /* 1 if record broken into fields */ -extern int donerec; /* 1 if record is valid (no fld has changed */ -extern char inputFS[]; /* FS at time of input, for field splitting */ - +extern bool donefld; /* true if record broken into fields */ +extern bool donerec; /* true if record is valid (no fld has changed */ extern int dbg; -extern char *patbeg; /* beginning of pattern matched */ +extern const char *patbeg; /* beginning of pattern matched */ extern int patlen; /* length of pattern matched. set in b.c */ /* Cell: all information about a variable or constant */ @@ -105,6 +120,7 @@ extern Cell *rsloc; /* RS */ extern Cell *rstartloc; /* RSTART */ extern Cell *rlengthloc; /* RLENGTH */ extern Cell *subseploc; /* SUBSEP */ +extern Cell *symtabloc; /* SYMTAB */ /* Cell.tval values: */ #define NUM 01 /* number value is valid */ @@ -134,6 +150,14 @@ extern Cell *subseploc; /* SUBSEP */ #define FTOUPPER 12 #define FTOLOWER 13 #define FFLUSH 14 +#define FAND 15 +#define FFOR 16 +#define FXOR 17 +#define FCOMPL 18 +#define FLSHIFT 19 +#define FRSHIFT 20 +#define FSYSTIME 21 +#define FSTRFTIME 22 /* Node: parse tree is made of nodes, with Cell's at bottom */ @@ -161,7 +185,7 @@ extern Node *nullnode; #define CCOPY 6 #define CCON 5 #define CTEMP 4 -#define CNAME 3 +#define CNAME 3 #define CVAR 2 #define CFLD 1 #define CUNK 0 @@ -211,6 +235,7 @@ extern int pairstack[], paircnt; #define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ /* watch out in match(), etc. */ +#define HAT (NCHARS+2) /* matches ^ in regular expr */ #define NSTATES 32 typedef struct rrow { @@ -224,16 +249,16 @@ typedef struct rrow { } rrow; typedef struct fa { - uschar gototab[NSTATES][NCHARS]; - uschar out[NSTATES]; + unsigned int **gototab; + uschar *out; uschar *restr; - int *posns[NSTATES]; - int anchor; + int **posns; + int state_count; + bool anchor; int use; int initstat; int curstat; int accept; - int reset; struct rrow re[1]; /* variable: actual size set by calling malloc */ } fa; diff --git a/awkgram.y b/awkgram.y index e4abeeddcb6a..f37073d1f9ac 100644 --- a/awkgram.y +++ b/awkgram.y @@ -32,8 +32,8 @@ int yywrap(void) { return(1); } Node *beginloc = 0; Node *endloc = 0; -int infunc = 0; /* = 1 if in arglist or body of func */ -int inloop = 0; /* = 1 if in while, for, do */ +bool infunc = false; /* = true if in arglist or body of func */ +int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ char *curfname = 0; /* current function name */ Node *arglist = 0; /* list of args for current function */ %} @@ -50,10 +50,10 @@ Node *arglist = 0; /* list of args for current function */ %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' %token <i> ARRAY %token <i> MATCH NOTMATCH MATCHOP -%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE +%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO %token <i> AND BOR APPEND EQ GE GT LE LT NE IN -%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC -%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE +%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE %token <i> ADD MINUS MULT DIVIDE MOD %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ %token <i> PRINT PRINTF SPRINTF @@ -71,6 +71,7 @@ Node *arglist = 0; /* list of args for current function */ %type <i> do st %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor %type <i> subop print +%type <cp> string %right ASGNOP %right '?' @@ -79,7 +80,7 @@ Node *arglist = 0; /* list of args for current function */ %left AND %left GETLINE %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' -%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC +%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR %left REGEXPR VAR VARNF IVAR WHILE '(' @@ -181,8 +182,8 @@ pa_stat: { beginloc = linkum(beginloc, $3); $$ = 0; } | XEND lbrace stmtlist '}' { endloc = linkum(endloc, $3); $$ = 0; } - | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' - { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } + | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' + { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } ; pa_stats: @@ -238,10 +239,10 @@ pattern: $$ = op3($2, (Node *)1, $1, $3); } | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } - | pattern '|' GETLINE var { + | pattern '|' GETLINE var { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, $4, itonp($2), $1); } - | pattern '|' GETLINE { + | pattern '|' GETLINE { if (safe) SYNTAX("cmd | getline is unsafe"); else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } @@ -292,7 +293,7 @@ rparen: ; simple_stmt: - print prarg '|' term { + print prarg '|' term { if (safe) SYNTAX("print | is unsafe"); else $$ = stat3($1, $2, itonp($3), $4); } | print prarg APPEND term { @@ -348,6 +349,11 @@ subop: SUB | GSUB ; +string: + STRING + | string STRING { $$ = catstr($1, $2); } + ; + term: term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } | term '+' term { $$ = op2(ADD, $1, $3); } @@ -369,6 +375,22 @@ term: | INCR var { $$ = op1(PREINCR, $2); } | var DECR { $$ = op1(POSTDECR, $1); } | var INCR { $$ = op1(POSTINCR, $1); } + | GENSUB '(' reg_expr comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } + | GENSUB '(' pattern comma pattern comma pattern ')' + { if (constnode($3)) + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); + else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); + } + | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' + { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } + | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' + { if (constnode($3)) + $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); + else + $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); + } | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } @@ -394,7 +416,7 @@ term: | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } - | STRING { $$ = celltonode($1, CCON); } + | string { $$ = celltonode($1, CCON); } | subop '(' reg_expr comma pattern ')' { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } | subop '(' pattern comma pattern ')' @@ -421,7 +443,7 @@ var: | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } | INDIRECT term { $$ = op1(INDIRECT, $2); } - ; + ; varlist: /* nothing */ { arglist = $$ = 0; } @@ -32,10 +32,8 @@ THIS SOFTWARE. #include <string.h> #include <stdlib.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" -#define HAT (NCHARS+2) /* matches ^ in regular expr */ - /* NCHARS is 2**n */ #define MAXLIN 22 #define type(v) (v)->nobj /* badly overloaded here */ @@ -63,40 +61,99 @@ int maxsetvec = 0; int rtok; /* next token in current re */ int rlxval; -static uschar *rlxstr; -static uschar *prestr; /* current position in current re */ -static uschar *lastre; /* origin of last re */ -static uschar *lastatom; /* origin of last Atom */ -static uschar *starttok; -static uschar *basestr; /* starts with original, replaced during +static const uschar *rlxstr; +static const uschar *prestr; /* current position in current re */ +static const uschar *lastre; /* origin of last re */ +static const uschar *lastatom; /* origin of last Atom */ +static const uschar *starttok; +static const uschar *basestr; /* starts with original, replaced during repetition processing */ -static uschar *firstbasestr; +static const uschar *firstbasestr; static int setcnt; static int poscnt; -char *patbeg; +const char *patbeg; int patlen; -#define NFA 20 /* cache this many dynamic fa's */ +#define NFA 128 /* cache this many dynamic fa's */ fa *fatab[NFA]; int nfatab = 0; /* entries in fatab */ -fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ +static int * +intalloc(size_t n, const char *f) +{ + int *p = (int *) calloc(n, sizeof(int)); + if (p == NULL) + overflo(f); + return p; +} + +static void +resizesetvec(const char *f) +{ + if (maxsetvec == 0) + maxsetvec = MAXLIN; + else + maxsetvec *= 4; + setvec = (int *) realloc(setvec, maxsetvec * sizeof(*setvec)); + tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(*tmpset)); + if (setvec == NULL || tmpset == NULL) + overflo(f); +} + +static void +resize_state(fa *f, int state) +{ + unsigned int **p; + uschar *p2; + int **p3; + int i, new_count; + + if (++state < f->state_count) + return; + + new_count = state + 10; /* needs to be tuned */ + + p = (unsigned int **) realloc(f->gototab, new_count * sizeof(f->gototab[0])); + if (p == NULL) + goto out; + f->gototab = p; + + p2 = (uschar *) realloc(f->out, new_count * sizeof(f->out[0])); + if (p2 == NULL) + goto out; + f->out = p2; + + p3 = (int **) realloc(f->posns, new_count * sizeof(f->posns[0])); + if (p3 == NULL) + goto out; + f->posns = p3; + + for (i = f->state_count; i < new_count; ++i) { + f->gototab[i] = (unsigned int *) calloc(NCHARS, sizeof(**f->gototab)); + if (f->gototab[i] == NULL) + goto out; + f->out[i] = 0; + f->posns[i] = NULL; + } + f->state_count = new_count; + return; +out: + overflo(__func__); +} + +fa *makedfa(const char *s, bool anchor) /* returns dfa for reg expr s */ { int i, use, nuse; fa *pfa; static int now = 1; - if (setvec == 0) { /* first time through any RE */ - maxsetvec = MAXLIN; - setvec = (int *) malloc(maxsetvec * sizeof(int)); - tmpset = (int *) malloc(maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space initializing makedfa"); + if (setvec == NULL) { /* first time through any RE */ + resizesetvec(__func__); } - if (compile_time) /* a constant for sure */ + if (compile_time != RUNNING) /* a constant for sure */ return mkdfa(s, anchor); for (i = 0; i < nfatab; i++) /* is it there already? */ if (fatab[i]->anchor == anchor @@ -124,13 +181,13 @@ fa *makedfa(const char *s, int anchor) /* returns dfa for reg expr s */ return pfa; } -fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ - /* anchor = 1 for anchored matches, else 0 */ +fa *mkdfa(const char *s, bool anchor) /* does the real work of making a dfa */ + /* anchor = true for anchored matches, else false */ { Node *p, *p1; fa *f; - firstbasestr = (uschar *) s; + firstbasestr = (const uschar *) s; basestr = firstbasestr; p = reparse(s); p1 = op2(CAT, op2(STAR, op2(ALL, NIL, NIL), NIL), p); @@ -140,15 +197,14 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ poscnt = 0; penter(p1); /* enter parent pointers and leaf indices */ - if ((f = (fa *) calloc(1, sizeof(fa) + poscnt*sizeof(rrow))) == NULL) - overflo("out of space for fa"); + if ((f = (fa *) calloc(1, sizeof(fa) + poscnt * sizeof(rrow))) == NULL) + overflo(__func__); f->accept = poscnt-1; /* penter has computed number of positions in re */ cfoll(f, p1); /* set up follow sets */ freetr(p1); - if ((f->posns[0] = (int *) calloc(1, *(f->re[0].lfollow)*sizeof(int))) == NULL) - overflo("out of space in makedfa"); - if ((f->posns[1] = (int *) calloc(1, sizeof(int))) == NULL) - overflo("out of space in makedfa"); + resize_state(f, 1); + f->posns[0] = intalloc(*(f->re[0].lfollow), __func__); + f->posns[1] = intalloc(1, __func__); *f->posns[1] = 0; f->initstat = makeinit(f, anchor); f->anchor = anchor; @@ -160,28 +216,26 @@ fa *mkdfa(const char *s, int anchor) /* does the real work of making a dfa */ return f; } -int makeinit(fa *f, int anchor) +int makeinit(fa *f, bool anchor) { int i, k; f->curstat = 2; f->out[2] = 0; - f->reset = 0; k = *(f->re[0].lfollow); - xfree(f->posns[2]); - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in makeinit"); - for (i=0; i <= k; i++) { + xfree(f->posns[2]); + f->posns[2] = intalloc(k + 1, __func__); + for (i = 0; i <= k; i++) { (f->posns[2])[i] = (f->re[0].lfollow)[i]; } if ((f->posns[2])[1] == f->accept) f->out[2] = 1; - for (i=0; i < NCHARS; i++) + for (i = 0; i < NCHARS; i++) f->gototab[2][i] = 0; f->curstat = cgoto(f, 2, HAT); if (anchor) { *f->posns[2] = k-1; /* leave out position 0 */ - for (i=0; i < k; i++) { + for (i = 0; i < k; i++) { (f->posns[0])[i] = (f->posns[2])[i]; } @@ -211,6 +265,8 @@ void penter(Node *p) /* set up parent pointers and leaf indices */ parent(left(p)) = p; parent(right(p)) = p; break; + case ZERO: + break; default: /* can't happen */ FATAL("can't happen: unknown type %d in penter", type(p)); break; @@ -225,6 +281,7 @@ void freetr(Node *p) /* free parse tree */ xfree(p); break; UNARY + case ZERO: freetr(left(p)); xfree(p); break; @@ -243,13 +300,13 @@ void freetr(Node *p) /* free parse tree */ /* in the parsing of regular expressions, metacharacters like . have */ /* to be seen literally; \056 is not a metacharacter. */ -int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ +int hexstr(const uschar **pp) /* find and eval hex string at pp, return new p */ { /* only pick up one 8-bit byte (2 chars) */ - uschar *p; + const uschar *p; int n = 0; int i; - for (i = 0, p = (uschar *) *pp; i < 2 && isxdigit(*p); i++, p++) { + for (i = 0, p = *pp; i < 2 && isxdigit(*p); i++, p++) { if (isdigit(*p)) n = 16 * n + *p - '0'; else if (*p >= 'a' && *p <= 'f') @@ -257,16 +314,16 @@ int hexstr(uschar **pp) /* find and eval hex string at pp, return new p */ else if (*p >= 'A' && *p <= 'F') n = 16 * n + *p - 'A' + 10; } - *pp = (uschar *) p; + *pp = p; return n; } #define isoctdigit(c) ((c) >= '0' && (c) <= '7') /* multiple use of arg */ -int quoted(uschar **pp) /* pick up next thing after a \\ */ +int quoted(const uschar **pp) /* pick up next thing after a \\ */ /* and increment *pp */ { - uschar *p = *pp; + const uschar *p = *pp; int c; if ((c = *p++) == 't') @@ -279,6 +336,10 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */ c = '\r'; else if (c == 'b') c = '\b'; + else if (c == 'v') + c = '\v'; + else if (c == 'a') + c = '\a'; else if (c == '\\') c = '\\'; else if (c == 'x') { /* hexadecimal goo follows */ @@ -300,13 +361,13 @@ int quoted(uschar **pp) /* pick up next thing after a \\ */ char *cclenter(const char *argp) /* add a character class */ { int i, c, c2; - uschar *p = (uschar *) argp; - uschar *op, *bp; - static uschar *buf = 0; + const uschar *op, *p = (const uschar *) argp; + uschar *bp; + static uschar *buf = NULL; static int bufsz = 100; op = p; - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space for character class [%.10s...] 1", p); bp = buf; for (i = 0; (c = *p++) != 0; ) { @@ -338,14 +399,14 @@ char *cclenter(const char *argp) /* add a character class */ i++; } *bp = 0; - dprintf( ("cclenter: in = |%s|, out = |%s|\n", op, buf) ); + DPRINTF("cclenter: in = |%s|, out = |%s|\n", op, buf); xfree(op); return (char *) tostring((char *) buf); } void overflo(const char *s) { - FATAL("regular expression too big: %.30s...", s); + FATAL("regular expression too big: out of space in %.30s...", s); } void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfollow[leaf] */ @@ -359,18 +420,13 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo f->re[info(v)].ltype = type(v); f->re[info(v)].lval.np = right(v); while (f->accept >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in cfoll()"); + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; follow(v); /* computes setvec and setcnt */ - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space building follow set"); + p = intalloc(setcnt + 1, __func__); f->re[info(v)].lfollow = p; *p = setcnt; for (i = f->accept; i >= 0; i--) @@ -385,6 +441,8 @@ void cfoll(fa *f, Node *v) /* enter follow set of each leaf of vertex v into lfo cfoll(f,left(v)); cfoll(f,right(v)); break; + case ZERO: + break; default: /* can't happen */ FATAL("can't happen: unknown type %d in cfoll", type(v)); } @@ -400,11 +458,7 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ LEAF lp = info(p); /* look for high-water mark of subscripts */ while (setcnt >= maxsetvec || lp >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in first()"); + resizesetvec(__func__); } if (type(p) == EMPTYRE) { setvec[lp] = 0; @@ -416,9 +470,10 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ } if (type(p) == CCL && (*(char *) right(p)) == '\0') return(0); /* empty CCL */ - else return(1); + return(1); case PLUS: - if (first(left(p)) == 0) return(0); + if (first(left(p)) == 0) + return(0); return(1); case STAR: case QUEST: @@ -431,6 +486,8 @@ int first(Node *p) /* collects initially active leaves of p into setvec */ b = first(right(p)); if (first(left(p)) == 0 || b == 0) return(0); return(1); + case ZERO: + return 0; } FATAL("can't happen: unknown type %d in first", type(p)); /* can't happen */ return(-1); @@ -469,7 +526,7 @@ void follow(Node *v) /* collects leaves that can follow v into setvec */ int member(int c, const char *sarg) /* is c in s? */ { - uschar *s = (uschar *) sarg; + const uschar *s = (const uschar *) sarg; while (*s) if (c == *s++) @@ -480,9 +537,11 @@ int member(int c, const char *sarg) /* is c in s? */ int match(fa *f, const char *p0) /* shortest match ? */ { int s, ns; - uschar *p = (uschar *) p0; + const uschar *p = (const uschar *) p0; + + s = f->initstat; + assert (s < f->state_count); - s = f->reset ? makeinit(f,0) : f->initstat; if (f->out[s]) return(1); do { @@ -500,17 +559,13 @@ int match(fa *f, const char *p0) /* shortest match ? */ int pmatch(fa *f, const char *p0) /* longest match, for sub */ { int s, ns; - uschar *p = (uschar *) p0; - uschar *q; - int i, k; + const uschar *p = (const uschar *) p0; + const uschar *q; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } - patbeg = (char *) p; + s = f->initstat; + assert(s < f->state_count); + + patbeg = (const char *)p; patlen = -1; do { q = p; @@ -522,9 +577,12 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ s = ns; else s = cgoto(f, s, *q); + + assert(s < f->state_count); + if (s == 1) { /* no transition */ if (patlen >= 0) { - patbeg = (char *) p; + patbeg = (const char *) p; return(1); } else @@ -534,41 +592,25 @@ int pmatch(fa *f, const char *p0) /* longest match, for sub */ if (f->out[s]) patlen = q-p-1; /* don't count $ */ if (patlen >= 0) { - patbeg = (char *) p; + patbeg = (const char *) p; return(1); } nextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of space in pmatch"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } - } while (*p++ != 0); + } while (*p++); return (0); } int nematch(fa *f, const char *p0) /* non-empty match, for sub */ { int s, ns; - uschar *p = (uschar *) p0; - uschar *q; - int i, k; + const uschar *p = (const uschar *) p0; + const uschar *q; - /* s = f->reset ? makeinit(f,1) : f->initstat; */ - if (f->reset) { - f->initstat = s = makeinit(f,1); - } else { - s = f->initstat; - } + s = f->initstat; + assert(s < f->state_count); + + patbeg = (const char *)p; patlen = -1; while (*p) { q = p; @@ -582,7 +624,7 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ s = cgoto(f, s, *q); if (s == 1) { /* no transition */ if (patlen > 0) { - patbeg = (char *) p; + patbeg = (const char *) p; return(1); } else goto nnextin; /* no nonempty match */ @@ -591,35 +633,110 @@ int nematch(fa *f, const char *p0) /* non-empty match, for sub */ if (f->out[s]) patlen = q-p-1; /* don't count $ */ if (patlen > 0 ) { - patbeg = (char *) p; + patbeg = (const char *) p; return(1); } nnextin: s = 2; - if (f->reset) { - for (i = 2; i <= f->curstat; i++) - xfree(f->posns[i]); - k = *f->posns[0]; - if ((f->posns[2] = (int *) calloc(1, (k+1)*sizeof(int))) == NULL) - overflo("out of state space"); - for (i = 0; i <= k; i++) - (f->posns[2])[i] = (f->posns[0])[i]; - f->initstat = f->curstat = 2; - f->out[2] = f->out[0]; - for (i = 0; i < NCHARS; i++) - f->gototab[2][i] = 0; - } p++; } return (0); } + +/* + * NAME + * fnematch + * + * DESCRIPTION + * A stream-fed version of nematch which transfers characters to a + * null-terminated buffer. All characters up to and including the last + * character of the matching text or EOF are placed in the buffer. If + * a match is found, patbeg and patlen are set appropriately. + * + * RETURN VALUES + * false No match found. + * true Match found. + */ + +bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) +{ + char *buf = *pbuf; + int bufsize = *pbufsize; + int c, i, j, k, ns, s; + + s = pfa->initstat; + patlen = 0; + + /* + * All indices relative to buf. + * i <= j <= k <= bufsize + * + * i: origin of active substring + * j: current character + * k: destination of next getc() + */ + i = -1, k = 0; + do { + j = i++; + do { + if (++j == k) { + if (k == bufsize) + if (!adjbuf((char **) &buf, &bufsize, bufsize+1, quantum, 0, "fnematch")) + FATAL("stream '%.30s...' too long", buf); + buf[k++] = (c = getc(f)) != EOF ? c : 0; + } + c = (uschar)buf[j]; + /* assert(c < NCHARS); */ + + if ((ns = pfa->gototab[s][c]) != 0) + s = ns; + else + s = cgoto(pfa, s, c); + + if (pfa->out[s]) { /* final state */ + patlen = j - i + 1; + if (c == 0) /* don't count $ */ + patlen--; + } + } while (buf[j] && s != 1); + s = 2; + } while (buf[i] && !patlen); + + /* adjbuf() may have relocated a resized buffer. Inform the world. */ + *pbuf = buf; + *pbufsize = bufsize; + + if (patlen) { + patbeg = (char *) buf + i; + /* + * Under no circumstances is the last character fed to + * the automaton part of the match. It is EOF's nullbyte, + * or it sent the automaton into a state with no further + * transitions available (s==1), or both. Room for a + * terminating nullbyte is guaranteed. + * + * ungetc any chars after the end of matching text + * (except for EOF's nullbyte, if present) and null + * terminate the buffer. + */ + do + if (buf[--k] && ungetc(buf[k], f) == EOF) + FATAL("unable to ungetc '%c'", buf[k]); + while (k > i + patlen); + buf[k] = '\0'; + return true; + } + else + return false; +} + Node *reparse(const char *p) /* parses regular expression pointed to by p */ { /* uses relex() to scan regular expression */ Node *np; - dprintf( ("reparse <%s>\n", p) ); - lastre = prestr = (uschar *) p; /* prestr points to string to be parsed */ + DPRINTF("reparse <%s>\n", p); + lastre = prestr = (const uschar *) p; /* prestr points to string to be parsed */ rtok = relex(); /* GNU compatibility: an empty regexp matches anything */ if (rtok == '\0') { @@ -659,12 +776,12 @@ Node *primary(void) rtok = relex(); return (unary(op2(DOT, NIL, NIL))); case CCL: - np = op2(CCL, NIL, (Node*) cclenter((char *) rlxstr)); + np = op2(CCL, NIL, (Node*) cclenter((const char *) rlxstr)); lastatom = starttok; rtok = relex(); return (unary(np)); case NCCL: - np = op2(NCCL, NIL, (Node *) cclenter((char *) rlxstr)); + np = op2(NCCL, NIL, (Node *) cclenter((const char *) rlxstr)); lastatom = starttok; rtok = relex(); return (unary(np)); @@ -730,6 +847,9 @@ Node *unary(Node *np) case QUEST: rtok = relex(); return (unary(op2(QUEST, np, NIL))); + case ZERO: + rtok = relex(); + return (unary(op2(ZERO, np, NIL))); default: return (np); } @@ -765,7 +885,7 @@ int (xisblank)(int c) #endif -struct charclass { +static const struct charclass { const char *cc_name; int cc_namelen; int (*cc_func)(int); @@ -801,10 +921,10 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, int i, j; uschar *buf = 0; int ret = 1; - int init_q = (firstnum==0); /* first added char will be ? */ + int init_q = (firstnum == 0); /* first added char will be ? */ int n_q_reps = secondnum-firstnum; /* m>n, so reduce until {1,m-n} left */ int prefix_length = reptok - basestr; /* prefix includes first rep */ - int suffix_length = strlen((char *) reptok) - reptoklen; /* string after rep specifier */ + int suffix_length = strlen((const char *) reptok) - reptoklen; /* string after rep specifier */ int size = prefix_length + suffix_length; if (firstnum > 1) { /* add room for reps 2 through firstnum */ @@ -819,7 +939,7 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, } else if (special_case == REPEAT_ZERO) { size += 2; /* just a null ERE: () */ } - if ((buf = (uschar *) malloc(size+1)) == NULL) + if ((buf = (uschar *) malloc(size + 1)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); memcpy(buf, basestr, prefix_length); /* copy prefix */ j = prefix_length; @@ -828,15 +948,16 @@ replace_repeat(const uschar *reptok, int reptoklen, const uschar *atom, buf[j++] = '('; buf[j++] = ')'; } - for (i=1; i < firstnum; i++) { /* copy x reps */ + for (i = 1; i < firstnum; i++) { /* copy x reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; } if (special_case == REPEAT_PLUS_APPENDED) { buf[j++] = '+'; } else if (special_case == REPEAT_WITH_Q) { - if (init_q) buf[j++] = '?'; - for (i=0; i < n_q_reps; i++) { /* copy x? reps */ + if (init_q) + buf[j++] = '?'; + for (i = init_q; i < n_q_reps; i++) { /* copy x? reps */ memcpy(&buf[j], atom, atomlen); j += atomlen; buf[j++] = '?'; @@ -905,13 +1026,15 @@ int relex(void) /* lexical analyzer for reparse */ { int c, n; int cflag; - static uschar *buf = 0; + static uschar *buf = NULL; static int bufsz = 100; uschar *bp; - struct charclass *cc; + const struct charclass *cc; int i; - int num, m, commafound, digitfound; + int num, m; + bool commafound, digitfound; const uschar *startreptok; + static int parens = 0; rescan: starttok = prestr; @@ -925,17 +1048,26 @@ rescan: case '\0': prestr--; return '\0'; case '^': case '$': + return c; case '(': - case ')': + parens++; return c; + case ')': + if (parens) { + parens--; + return c; + } + /* unmatched close parenthesis; per POSIX, treat as literal */ + rlxval = c; + return CHAR; case '\\': rlxval = quoted(&prestr); return CHAR; default: rlxval = c; return CHAR; - case '[': - if (buf == 0 && (buf = (uschar *) malloc(bufsz)) == NULL) + case '[': + if (buf == NULL && (buf = (uschar *) malloc(bufsz)) == NULL) FATAL("out of space in reg expr %.10s..", lastre); bp = buf; if (*prestr == '^') { @@ -975,6 +1107,12 @@ rescan: if (!adjbuf((char **) &buf, &bufsz, bp-buf+1, 100, (char **) &bp, "relex2")) FATAL("out of space for reg expr %.10s...", lastre); if (cc->cc_func(i)) { + /* escape backslash */ + if (i == '\\') { + *bp++ = '\\'; + n++; + } + *bp++ = i; n++; } @@ -1034,8 +1172,8 @@ rescan: if (isdigit(*(prestr))) { num = 0; /* Process as a repetition */ n = -1; m = -1; - commafound = 0; - digitfound = 0; + commafound = false; + digitfound = false; startreptok = prestr-1; /* Remember start of previous atom here ? */ } else { /* just a { char, not a repetition */ @@ -1047,15 +1185,17 @@ rescan: if (commafound) { if (digitfound) { /* {n,m} */ m = num; - if (m<n) + if (m < n) FATAL("illegal repetition expression: class %.20s", lastre); - if ((n==0) && (m==1)) { + if (n == 0 && m == 1) { return QUEST; } } else { /* {n,} */ - if (n==0) return STAR; - if (n==1) return PLUS; + if (n == 0) + return STAR; + else if (n == 1) + return PLUS; } } else { if (digitfound) { /* {n} same as {n,n} */ @@ -1068,8 +1208,8 @@ rescan: } if (repeat(starttok, prestr-starttok, lastatom, startreptok - lastatom, n, m) > 0) { - if ((n==0) && (m==0)) { - return EMPTYRE; + if (n == 0 && m == 0) { + return ZERO; } /* must rescan input for next token */ goto rescan; @@ -1082,15 +1222,15 @@ rescan: lastre); } else if (isdigit(c)) { num = 10 * num + c - '0'; - digitfound = 1; + digitfound = true; } else if (c == ',') { if (commafound) FATAL("illegal repetition expression: class %.20s", lastre); /* looking for {n,} or {n,m} */ - commafound = 1; + commafound = true; n = num; - digitfound = 0; /* reset */ + digitfound = false; /* reset */ num = 0; } else { FATAL("illegal repetition expression: class %.20s", @@ -1103,20 +1243,17 @@ rescan: int cgoto(fa *f, int s, int c) { - int i, j, k; int *p, *q; + int i, j, k; assert(c == HAT || c < NCHARS); while (f->accept >= maxsetvec) { /* guessing here! */ - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("out of space in cgoto()"); + resizesetvec(__func__); } for (i = 0; i <= f->accept; i++) setvec[i] = 0; setcnt = 0; + resize_state(f, s); /* compute positions of gototab[s,c] into setvec */ p = f->posns[s]; for (i = 1; i <= *p; i++) { @@ -1130,11 +1267,7 @@ int cgoto(fa *f, int s, int c) q = f->re[p[i]].lfollow; for (j = 1; j <= *q; j++) { if (q[j] >= maxsetvec) { - maxsetvec *= 4; - setvec = (int *) realloc(setvec, maxsetvec * sizeof(int)); - tmpset = (int *) realloc(tmpset, maxsetvec * sizeof(int)); - if (setvec == 0 || tmpset == 0) - overflo("cgoto overflow"); + resizesetvec(__func__); } if (setvec[q[j]] == 0) { setcnt++; @@ -1151,6 +1284,7 @@ int cgoto(fa *f, int s, int c) if (setvec[i]) { tmpset[j++] = i; } + resize_state(f, f->curstat > s ? f->curstat : s); /* tmpset == previous state? */ for (i = 1; i <= f->curstat; i++) { p = f->posns[i]; @@ -1160,27 +1294,23 @@ int cgoto(fa *f, int s, int c) if (tmpset[j] != p[j]) goto different; /* setvec is state i */ - f->gototab[s][c] = i; + if (c != HAT) + f->gototab[s][c] = i; return i; different:; } /* add tmpset to current set of states */ - if (f->curstat >= NSTATES-1) { - f->curstat = 2; - f->reset = 1; - for (i = 2; i < NSTATES; i++) - xfree(f->posns[i]); - } else - ++(f->curstat); + ++(f->curstat); + resize_state(f, f->curstat); for (i = 0; i < NCHARS; i++) f->gototab[f->curstat][i] = 0; xfree(f->posns[f->curstat]); - if ((p = (int *) calloc(1, (setcnt+1)*sizeof(int))) == NULL) - overflo("out of space in cgoto"); + p = intalloc(setcnt + 1, __func__); f->posns[f->curstat] = p; - f->gototab[s][c] = f->curstat; + if (c != HAT) + f->gototab[s][c] = f->curstat; for (i = 0; i <= setcnt; i++) p[i] = tmpset[i]; if (setvec[f->accept]) @@ -1197,13 +1327,18 @@ void freefa(fa *f) /* free a finite automaton */ if (f == NULL) return; + for (i = 0; i < f->state_count; i++) + xfree(f->gototab[i]) for (i = 0; i <= f->curstat; i++) xfree(f->posns[i]); for (i = 0; i <= f->accept; i++) { xfree(f->re[i].lfollow); if (f->re[i].ltype == CCL || f->re[i].ltype == NCCL) - xfree((f->re[i].lval.np)); + xfree(f->re[i].lval.np); } xfree(f->restr); + xfree(f->out); + xfree(f->posns); + xfree(f->gototab); xfree(f); } diff --git a/bugs-fixed/missing-precision.ok b/bugs-fixed/missing-precision.ok index 608b4fa48666..75e1e3d00446 100644 --- a/bugs-fixed/missing-precision.ok +++ b/bugs-fixed/missing-precision.ok @@ -1,2 +1,2 @@ -./a.out: not enough args in printf(%*s) +../a.out: not enough args in printf(%*s) source line number 1 diff --git a/bugs-fixed/negative-nf.ok b/bugs-fixed/negative-nf.ok index 71c860468cc0..de97f8b27def 100644 --- a/bugs-fixed/negative-nf.ok +++ b/bugs-fixed/negative-nf.ok @@ -1,2 +1,2 @@ -./a.out: cannot set NF to a negative value +../a.out: cannot set NF to a negative value source line number 1 @@ -27,10 +27,10 @@ THIS SOFTWARE. #include <string.h> #include <ctype.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" extern YYSTYPE yylval; -extern int infunc; +extern bool infunc; int lineno = 1; int bracecnt = 0; @@ -43,13 +43,15 @@ typedef struct Keyword { int type; } Keyword; -Keyword keywords[] ={ /* keep sorted: binary searched */ +const Keyword keywords[] = { /* keep sorted: binary searched */ { "BEGIN", XBEGIN, XBEGIN }, { "END", XEND, XEND }, { "NF", VARNF, VARNF }, + { "and", FAND, BLTIN }, { "atan2", FATAN, BLTIN }, { "break", BREAK, BREAK }, { "close", CLOSE, CLOSE }, + { "compl", FCOMPL, BLTIN }, { "continue", CONTINUE, CONTINUE }, { "cos", FCOS, BLTIN }, { "delete", DELETE, DELETE }, @@ -61,6 +63,7 @@ Keyword keywords[] ={ /* keep sorted: binary searched */ { "for", FOR, FOR }, { "func", FUNC, FUNC }, { "function", FUNC, FUNC }, + { "gensub", GENSUB, GENSUB }, { "getline", GETLINE, GETLINE }, { "gsub", GSUB, GSUB }, { "if", IF, IF }, @@ -69,36 +72,42 @@ Keyword keywords[] ={ /* keep sorted: binary searched */ { "int", FINT, BLTIN }, { "length", FLENGTH, BLTIN }, { "log", FLOG, BLTIN }, + { "lshift", FLSHIFT, BLTIN }, { "match", MATCHFCN, MATCHFCN }, { "next", NEXT, NEXT }, { "nextfile", NEXTFILE, NEXTFILE }, + { "or", FFOR, BLTIN }, { "print", PRINT, PRINT }, { "printf", PRINTF, PRINTF }, { "rand", FRAND, BLTIN }, { "return", RETURN, RETURN }, + { "rshift", FRSHIFT, BLTIN }, { "sin", FSIN, BLTIN }, { "split", SPLIT, SPLIT }, { "sprintf", SPRINTF, SPRINTF }, { "sqrt", FSQRT, BLTIN }, { "srand", FSRAND, BLTIN }, + { "strftime", FSTRFTIME, BLTIN }, { "sub", SUB, SUB }, { "substr", SUBSTR, SUBSTR }, { "system", FSYSTEM, BLTIN }, + { "systime", FSYSTIME, BLTIN }, { "tolower", FTOLOWER, BLTIN }, { "toupper", FTOUPPER, BLTIN }, { "while", WHILE, WHILE }, + { "xor", FXOR, BLTIN }, }; #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } -int peek(void) +static int peek(void) { int c = input(); unput(c); return c; } -int gettok(char **pbuf, int *psz) /* get next input token */ +static int gettok(char **pbuf, int *psz) /* get next input token */ { int c, retc; char *buf = *pbuf; @@ -136,7 +145,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */ if (bp-buf >= sz) if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) FATAL( "out of space for number %.10s...", buf ); - if (isdigit(c) || c == 'e' || c == 'E' + if (isdigit(c) || c == 'e' || c == 'E' || c == '.' || c == '+' || c == '-') *bp++ = c; else { @@ -148,7 +157,7 @@ int gettok(char **pbuf, int *psz) /* get next input token */ strtod(buf, &rem); /* parse the number */ if (rem == buf) { /* it wasn't a valid number at all */ buf[1] = 0; /* return one character as token */ - retc = buf[0]; /* character is its own type */ + retc = (uschar)buf[0]; /* character is its own type */ unputstr(rem+1); /* put rest back for later */ } else { /* some prefix was a number */ unputstr(rem); /* put rest back for later */ @@ -164,23 +173,23 @@ int gettok(char **pbuf, int *psz) /* get next input token */ int word(char *); int string(void); int regexpr(void); -int sc = 0; /* 1 => return a } right now */ -int reg = 0; /* 1 => return a REGEXPR now */ +bool sc = false; /* true => return a } right now */ +bool reg = false; /* true => return a REGEXPR now */ int yylex(void) { int c; - static char *buf = 0; + static char *buf = NULL; static int bufsize = 5; /* BUG: setting this small causes core dump! */ - if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL) FATAL( "out of space in yylex" ); if (sc) { - sc = 0; + sc = false; RET('}'); } if (reg) { - reg = 0; + reg = false; return regexpr(); } for (;;) { @@ -190,11 +199,18 @@ int yylex(void) if (isalpha(c) || c == '_') return word(buf); if (isdigit(c)) { - yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); + char *cp = tostring(buf); + double result; + + if (is_number(cp, & result)) + yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab); + else + yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab); + free(cp); /* should this also have STR set? */ RET(NUMBER); } - + yylval.i = c; switch (c) { case '\n': /* {EOL} */ @@ -208,6 +224,11 @@ int yylex(void) while ((c = input()) != '\n' && c != 0) ; unput(c); + /* + * Next line is a hack, itcompensates for + * unput's treatment of \n. + */ + lineno++; break; case ';': RET(';'); @@ -225,7 +246,7 @@ int yylex(void) case '&': if (peek() == '&') { input(); RET(AND); - } else + } else RET('&'); case '|': if (peek() == '|') { @@ -323,11 +344,11 @@ int yylex(void) unputstr(buf); RET(INDIRECT); } - + case '}': if (--bracecnt < 0) SYNTAX( "extra }" ); - sc = 1; + sc = true; RET(';'); case ']': if (--brackcnt < 0) @@ -346,10 +367,10 @@ int yylex(void) case '(': parencnt++; RET('('); - + case '"': return string(); /* BUG: should be like tran.c ? */ - + default: RET(c); } @@ -360,10 +381,10 @@ int string(void) { int c, n; char *s, *bp; - static char *buf = 0; + static char *buf = NULL; static int bufsz = 500; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for strings"); for (bp = buf; (c = input()) != '"'; ) { if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) @@ -381,14 +402,15 @@ int string(void) case '\\': c = input(); switch (c) { + case '\n': break; case '"': *bp++ = '"'; break; - case 'n': *bp++ = '\n'; break; + case 'n': *bp++ = '\n'; break; case 't': *bp++ = '\t'; break; case 'f': *bp++ = '\f'; break; case 'r': *bp++ = '\r'; break; case 'b': *bp++ = '\b'; break; case 'v': *bp++ = '\v'; break; - case 'a': *bp++ = '\007'; break; + case 'a': *bp++ = '\a'; break; case '\\': *bp++ = '\\'; break; case '0': case '1': case '2': /* octal: \d \dd \ddd */ @@ -419,7 +441,7 @@ int string(void) break; } - default: + default: *bp++ = c; break; } @@ -429,15 +451,16 @@ int string(void) break; } } - *bp = 0; + *bp = 0; s = tostring(buf); - *bp++ = ' '; *bp++ = 0; + *bp++ = ' '; *bp++ = '\0'; yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); + free(s); RET(STRING); } -int binsearch(char *w, Keyword *kp, int n) +static int binsearch(char *w, const Keyword *kp, int n) { int cond, low, mid, high; @@ -455,15 +478,14 @@ int binsearch(char *w, Keyword *kp, int n) return -1; } -int word(char *w) +int word(char *w) { - Keyword *kp; + const Keyword *kp; int c, n; n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); -/* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */ - kp = keywords + n; if (n != -1) { /* found in table */ + kp = keywords + n; yylval.i = kp->sub; switch (kp->type) { /* special handling */ case BLTIN: @@ -501,17 +523,17 @@ int word(char *w) void startreg(void) /* next call to yylex will return a regular expression */ { - reg = 1; + reg = true; } int regexpr(void) { int c; - static char *buf = 0; + static char *buf = NULL; static int bufsz = 500; char *bp; - if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) + if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) FATAL("out of space for rex expr"); bp = buf; for ( ; (c = input()) != '/' && c != 0; ) { @@ -519,11 +541,11 @@ int regexpr(void) FATAL("out of space for reg expr %.10s...", buf); if (c == '\n') { *bp = '\0'; - SYNTAX( "newline in regular expression %.10s...", buf ); + SYNTAX( "newline in regular expression %.10s...", buf ); unput('\n'); break; } else if (c == '\\') { - *bp++ = '\\'; + *bp++ = '\\'; *bp++ = input(); } else { *bp++ = c; @@ -543,7 +565,7 @@ char ebuf[300]; char *ep = ebuf; char yysbuf[100]; /* pushback buffer */ char *yysptr = yysbuf; -FILE *yyin = 0; +FILE *yyin = NULL; int input(void) /* get next lexical input character */ { @@ -570,6 +592,8 @@ int input(void) /* get next lexical input character */ void unput(int c) /* put lexical character back on input */ { + if (c == '\n') + lineno--; if (yysptr >= yysbuf + sizeof(yysbuf)) FATAL("pushed back too much: %.20s...", yysbuf); *yysptr++ = c; @@ -25,43 +25,49 @@ THIS SOFTWARE. #define DEBUG #include <stdio.h> #include <string.h> +#include <strings.h> #include <ctype.h> #include <errno.h> #include <stdlib.h> #include <stdarg.h> +#include <limits.h> +#include <math.h> #include "awk.h" -#include "ytab.h" +char EMPTY[] = { '\0' }; FILE *infile = NULL; -char *file = ""; +bool innew; /* true = infile has not been read by readrec */ +char *file = EMPTY; char *record; int recsize = RECSIZE; char *fields; int fieldssize = RECSIZE; Cell **fldtab; /* pointers to Cells */ -char inputFS[100] = " "; +static size_t len_inputFS = 0; +static char *inputFS = NULL; /* FS at time of input, for field splitting */ #define MAXFLD 2 int nfields = MAXFLD; /* last allocated slot for $i */ -int donefld; /* 1 = implies rec broken into fields */ -int donerec; /* 1 = record is valid (no flds have changed) */ +bool donefld; /* true = implies rec broken into fields */ +bool donerec; /* true = record is valid (no flds have changed) */ int lastfld = 0; /* last used field */ int argno = 1; /* current input argument number */ extern Awkfloat *ARGC; -static Cell dollar0 = { OCELL, CFLD, NULL, "", 0.0, REC|STR|DONTFREE }; -static Cell dollar1 = { OCELL, CFLD, NULL, "", 0.0, FLD|STR|DONTFREE }; +static Cell dollar0 = { OCELL, CFLD, NULL, EMPTY, 0.0, REC|STR|DONTFREE, NULL, NULL }; +static Cell dollar1 = { OCELL, CFLD, NULL, EMPTY, 0.0, FLD|STR|DONTFREE, NULL, NULL }; void recinit(unsigned int n) { if ( (record = (char *) malloc(n)) == NULL || (fields = (char *) malloc(n+1)) == NULL - || (fldtab = (Cell **) malloc((nfields+2) * sizeof(Cell *))) == NULL - || (fldtab[0] = (Cell *) malloc(sizeof(Cell))) == NULL ) + || (fldtab = (Cell **) calloc(nfields+2, sizeof(*fldtab))) == NULL + || (fldtab[0] = (Cell *) malloc(sizeof(**fldtab))) == NULL) FATAL("out of space for $0 and fields"); + *record = '\0'; *fldtab[0] = dollar0; fldtab[0]->sval = record; fldtab[0]->nval = tostring("0"); @@ -74,11 +80,11 @@ void makefields(int n1, int n2) /* create $n1..$n2 inclusive */ int i; for (i = n1; i <= n2; i++) { - fldtab[i] = (Cell *) malloc(sizeof (struct Cell)); + fldtab[i] = (Cell *) malloc(sizeof(**fldtab)); if (fldtab[i] == NULL) FATAL("out of space in makefields %d", i); *fldtab[i] = dollar1; - sprintf(temp, "%d", i); + snprintf(temp, sizeof(temp), "%d", i); fldtab[i]->nval = tostring(temp); } } @@ -102,11 +108,36 @@ void initgetrec(void) argno++; } infile = stdin; /* no filenames, so use stdin */ + innew = true; } -static int firsttime = 1; +/* + * POSIX specifies that fields are supposed to be evaluated as if they were + * split using the value of FS at the time that the record's value ($0) was + * read. + * + * Since field-splitting is done lazily, we save the current value of FS + * whenever a new record is read in (implicitly or via getline), or when + * a new value is assigned to $0. + */ +void savefs(void) +{ + size_t len; + if ((len = strlen(getsval(fsloc))) < len_inputFS) { + strcpy(inputFS, *FS); /* for subsequent field splitting */ + return; + } -int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ + len_inputFS = len + 1; + inputFS = (char *) realloc(inputFS, len_inputFS); + if (inputFS == NULL) + FATAL("field separator %.10s... is too long", *FS); + memcpy(inputFS, *FS, len_inputFS); +} + +static bool firsttime = true; + +int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record */ { /* note: cares whether buf == record */ int c; char *buf = *pbuf; @@ -114,19 +145,20 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ int bufsize = *pbufsize, savebufsize = bufsize; if (firsttime) { - firsttime = 0; + firsttime = false; initgetrec(); } - dprintf( ("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", - *RS, *FS, *ARGC, *FILENAME) ); + DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", + *RS, *FS, *ARGC, *FILENAME); if (isrecord) { - donefld = 0; - donerec = 1; + donefld = false; + donerec = true; + savefs(); } saveb0 = buf[0]; buf[0] = 0; while (argno < *ARGC || infile == stdin) { - dprintf( ("argno=%d, file=|%s|\n", argno, file) ); + DPRINTF("argno=%d, file=|%s|\n", argno, file); if (infile == NULL) { /* have to open a new file */ file = getargv(argno); if (file == NULL || *file == '\0') { /* deleted or zapped */ @@ -139,22 +171,26 @@ int getrec(char **pbuf, int *pbufsize, int isrecord) /* get next input record */ continue; } *FILENAME = file; - dprintf( ("opening file %s\n", file) ); + DPRINTF("opening file %s\n", file); if (*file == '-' && *(file+1) == '\0') infile = stdin; else if ((infile = fopen(file, "r")) == NULL) FATAL("can't open file %s", file); setfval(fnrloc, 0.0); } - c = readrec(&buf, &bufsize, infile); + c = readrec(&buf, &bufsize, infile, innew); + if (innew) + innew = false; if (c != 0 || buf[0] != '\0') { /* normal record */ if (isrecord) { + double result; + if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->sval = buf; /* buf == record */ fldtab[0]->tval = REC | STR | DONTFREE; - if (is_number(fldtab[0]->sval)) { - fldtab[0]->fval = atof(fldtab[0]->sval); + if (is_number(fldtab[0]->sval, & result)) { + fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } } @@ -184,47 +220,62 @@ void nextfile(void) argno++; } -int readrec(char **pbuf, int *pbufsize, FILE *inf) /* read one record into buf */ +int readrec(char **pbuf, int *pbufsize, FILE *inf, bool newflag) /* read one record into buf */ { - int sep, c; + int sep, c, isrec; char *rr, *buf = *pbuf; int bufsize = *pbufsize; char *rs = getsval(rsloc); - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - /*fflush(stdout); avoids some buffering problem but makes it 25% slower*/ - strcpy(inputFS, *FS); /* for subsequent field splitting */ - if ((sep = *rs) == 0) { - sep = '\n'; - while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ - ; - if (c != EOF) - ungetc(c, inf); - } - for (rr = buf; ; ) { - for (; (c=getc(inf)) != sep && c != EOF; ) { - if (rr-buf+1 > bufsize) - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 1")) - FATAL("input record `%.30s...' too long", buf); + if (*rs && rs[1]) { + bool found; + + fa *pfa = makedfa(rs, 1); + if (newflag) + found = fnematch(pfa, inf, &buf, &bufsize, recsize); + else { + int tempstat = pfa->initstat; + pfa->initstat = 2; + found = fnematch(pfa, inf, &buf, &bufsize, recsize); + pfa->initstat = tempstat; + } + if (found) + setptr(patbeg, '\0'); + } else { + if ((sep = *rs) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + for (rr = buf; ; ) { + for (; (c=getc(inf)) != sep && c != EOF; ) { + if (rr-buf+1 > bufsize) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, + recsize, &rr, "readrec 1")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = c; + } + if (*rs == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, + "readrec 2")) + FATAL("input record `%.30s...' too long", buf); + *rr++ = '\n'; *rr++ = c; } - if (*rs == sep || c == EOF) - break; - if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ - break; - if (!adjbuf(&buf, &bufsize, 2+rr-buf, recsize, &rr, "readrec 2")) + if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) FATAL("input record `%.30s...' too long", buf); - *rr++ = '\n'; - *rr++ = c; + *rr = 0; } - if (!adjbuf(&buf, &bufsize, 1+rr-buf, recsize, &rr, "readrec 3")) - FATAL("input record `%.30s...' too long", buf); - *rr = 0; - dprintf( ("readrec saw <%s>, returns %d\n", buf, c == EOF && rr == buf ? 0 : 1) ); *pbuf = buf; *pbufsize = bufsize; - return c == EOF && rr == buf ? 0 : 1; + isrec = *buf || !feof(inf); + DPRINTF("readrec saw <%s>, returns %d\n", buf, isrec); + return isrec; } char *getargv(int n) /* get ARGV[n] */ @@ -233,12 +284,12 @@ char *getargv(int n) /* get ARGV[n] */ char *s, temp[50]; extern Array *ARGVtab; - sprintf(temp, "%d", n); + snprintf(temp, sizeof(temp), "%d", n); if (lookup(temp, ARGVtab) == NULL) return NULL; x = setsymtab(temp, "", 0.0, STR, ARGVtab); s = getsval(x); - dprintf( ("getargv(%d) returns |%s|\n", n, s) ); + DPRINTF("getargv(%d) returns |%s|\n", n, s); return s; } @@ -246,6 +297,7 @@ void setclvar(char *s) /* set var=value from s */ { char *p; Cell *q; + double result; for (p=s; *p != '='; p++) ; @@ -253,11 +305,11 @@ void setclvar(char *s) /* set var=value from s */ p = qstring(p, '\0'); q = setsymtab(s, p, 0.0, STR, symtab); setsval(q, p); - if (is_number(q->sval)) { - q->fval = atof(q->sval); + if (is_number(q->sval, & result)) { + q->fval = result; q->tval |= NUM; } - dprintf( ("command line set %s to |%s|\n", s, p) ); + DPRINTF("command line set %s to |%s|\n", s, p); } @@ -284,9 +336,8 @@ void fldbld(void) /* create fields from current record */ } fr = fields; i = 0; /* number of fields accumulated here */ - if (strlen(getsval(fsloc)) >= sizeof (inputFS)) - FATAL("field separator %.10s... is too long", *FS); - strcpy(inputFS, *FS); + if (inputFS == NULL) /* make sure we have a copy of FS */ + savefs(); if (strlen(inputFS) > 1) { /* it's a regular expression */ i = refldbld(r, inputFS); } else if ((sep = *inputFS) == ' ') { /* default whitespace */ @@ -309,15 +360,19 @@ void fldbld(void) /* create fields from current record */ } *fr = 0; } else if ((sep = *inputFS) == 0) { /* new: FS="" => 1 char/field */ - for (i = 0; *r != 0; r++) { - char buf[2]; + for (i = 0; *r != '\0'; r += n) { + char buf[MB_LEN_MAX + 1]; + i++; if (i > nfields) growfldtab(i); if (freeable(fldtab[i])) xfree(fldtab[i]->sval); - buf[0] = *r; - buf[1] = 0; + n = mblen(r, MB_LEN_MAX); + if (n < 0) + n = 1; + memcpy(buf, r, n); + buf[n] = '\0'; fldtab[i]->sval = tostring(buf); fldtab[i]->tval = FLD | STR; } @@ -350,16 +405,18 @@ void fldbld(void) /* create fields from current record */ FATAL("record `%.30s...' has too many fields; can't happen", r); cleanfld(i+1, lastfld); /* clean out junk from previous record */ lastfld = i; - donefld = 1; + donefld = true; for (j = 1; j <= lastfld; j++) { + double result; + p = fldtab[j]; - if(is_number(p->sval)) { - p->fval = atof(p->sval); + if(is_number(p->sval, & result)) { + p->fval = result; p->tval |= NUM; } } setfval(nfloc, (Awkfloat) lastfld); - donerec = 1; /* restore */ + donerec = true; /* restore */ if (dbg) { for (j = 0; j <= lastfld; j++) { p = fldtab[j]; @@ -377,7 +434,7 @@ void cleanfld(int n1, int n2) /* clean out fields n1 .. n2 inclusive */ p = fldtab[i]; if (freeable(p)) xfree(p->sval); - p->sval = ""; + p->sval = EMPTY, p->tval = FLD | STR | DONTFREE; } } @@ -423,7 +480,7 @@ void growfldtab(int n) /* make new fields up to at least $n */ if (n > nf) nf = n; s = (nf+1) * (sizeof (struct Cell *)); /* freebsd: how much do we need? */ - if (s / sizeof(struct Cell *) - 1 == nf) /* didn't overflow */ + if (s / sizeof(struct Cell *) - 1 == (size_t)nf) /* didn't overflow */ fldtab = (Cell **) realloc(fldtab, s); else /* overflow sizeof int */ xfree(fldtab); /* make it null */ @@ -453,7 +510,7 @@ int refldbld(const char *rec, const char *fs) /* build fields from reg expr in F if (*rec == '\0') return 0; pfa = makedfa(fs, 1); - dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs) ); + DPRINTF("into refldbld, rec = <%s>, pat = <%s>\n", rec, fs); tempstat = pfa->initstat; for (i = 1; ; i++) { if (i > nfields) @@ -462,22 +519,22 @@ int refldbld(const char *rec, const char *fs) /* build fields from reg expr in F xfree(fldtab[i]->sval); fldtab[i]->tval = FLD | STR | DONTFREE; fldtab[i]->sval = fr; - dprintf( ("refldbld: i=%d\n", i) ); + DPRINTF("refldbld: i=%d\n", i); if (nematch(pfa, rec)) { pfa->initstat = 2; /* horrible coupling to b.c */ - dprintf( ("match %s (%d chars)\n", patbeg, patlen) ); + DPRINTF("match %s (%d chars)\n", patbeg, patlen); strncpy(fr, rec, patbeg-rec); fr += patbeg - rec + 1; *(fr-1) = '\0'; rec = patbeg + patlen; } else { - dprintf( ("no match %s\n", rec) ); + DPRINTF("no match %s\n", rec); strcpy(fr, rec); pfa->initstat = tempstat; break; } } - return i; + return i; } void recbld(void) /* create $0 from $1..$NF if necessary */ @@ -486,7 +543,7 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ char *r, *p; char *sep = getsval(ofsloc); - if (donerec == 1) + if (donerec) return; r = record; for (i = 1; i <= *NF; i++) { @@ -505,16 +562,16 @@ void recbld(void) /* create $0 from $1..$NF if necessary */ if (!adjbuf(&record, &recsize, 2+r-record, recsize, &r, "recbld 3")) FATAL("built giant record `%.30s...'", record); *r = '\0'; - dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); + DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]); if (freeable(fldtab[0])) xfree(fldtab[0]->sval); fldtab[0]->tval = REC | STR | DONTFREE; fldtab[0]->sval = record; - dprintf( ("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]) ); - dprintf( ("recbld = |%s|\n", record) ); - donerec = 1; + DPRINTF("in recbld inputFS=%s, fldtab[0]=%p\n", inputFS, (void*)fldtab[0]); + DPRINTF("recbld = |%s|\n", record); + donerec = true; } int errorflag = 0; @@ -539,18 +596,13 @@ void SYNTAX(const char *fmt, ...) fprintf(stderr, " at source line %d", lineno); if (curfname != NULL) fprintf(stderr, " in function %s", curfname); - if (compile_time == 1 && cursource() != NULL) + if (compile_time == COMPILING && cursource() != NULL) fprintf(stderr, " source file %s", cursource()); fprintf(stderr, "\n"); errorflag = 2; eprint(); } -void fpecatch(int n) -{ - FATAL("floating point exception %d", n); -} - extern int bracecnt, brackcnt, parencnt; void bracecheck(void) @@ -613,20 +665,22 @@ void error() extern Node *curnode; fprintf(stderr, "\n"); - if (compile_time != 2 && NR && *NR > 0) { - fprintf(stderr, " input record number %d", (int) (*FNR)); - if (strcmp(*FILENAME, "-") != 0) - fprintf(stderr, ", file %s", *FILENAME); + if (compile_time != ERROR_PRINTING) { + if (NR && *NR > 0) { + fprintf(stderr, " input record number %d", (int) (*FNR)); + if (strcmp(*FILENAME, "-") != 0) + fprintf(stderr, ", file %s", *FILENAME); + fprintf(stderr, "\n"); + } + if (curnode) + fprintf(stderr, " source line number %d", curnode->lineno); + else if (lineno) + fprintf(stderr, " source line number %d", lineno); + if (compile_time == COMPILING && cursource() != NULL) + fprintf(stderr, " source file %s", cursource()); fprintf(stderr, "\n"); + eprint(); } - if (compile_time != 2 && curnode) - fprintf(stderr, " source line number %d", curnode->lineno); - else if (compile_time != 2 && lineno) - fprintf(stderr, " source line number %d", lineno); - if (compile_time == 1 && cursource() != NULL) - fprintf(stderr, " source file %s", cursource()); - fprintf(stderr, "\n"); - eprint(); } void eprint(void) /* try to print context around error */ @@ -636,7 +690,7 @@ void eprint(void) /* try to print context around error */ static int been_here = 0; extern char ebuf[], *ep; - if (compile_time == 2 || compile_time == 0 || been_here++ > 0) + if (compile_time != COMPILING || been_here++ > 0 || ebuf == ep) return; if (ebuf == ep) return; @@ -710,19 +764,75 @@ int isclvar(const char *s) /* is s of form var=something ? */ /* appears to be broken in gcc on linux: thinks 0x123 is a valid FP number */ /* wrong: violates 4.10.1.4 of ansi C standard */ -#include <math.h> -int is_number(const char *s) +/* well, not quite. As of C99, hex floating point is allowed. so this is + * a bit of a mess. We work around the mess by checking for a hexadecimal + * value and disallowing it. Similarly, we now follow gawk and allow only + * +nan, -nan, +inf, and -inf for NaN and infinity values. + */ + +/* + * This routine now has a more complicated interface, the main point + * being to avoid the double conversion of a string to double, and + * also to convey out, if requested, the information that the numeric + * value was a leading string or is all of the string. The latter bit + * is used in getfval(). + */ + +bool is_valid_number(const char *s, bool trailing_stuff_ok, + bool *no_trailing, double *result) { double r; char *ep; + bool retval = false; + bool is_nan = false; + bool is_inf = false; + + if (no_trailing) + *no_trailing = false; + + while (isspace(*s)) + s++; + + // no hex floating point, sorry + if (s[0] == '0' && tolower(s[1]) == 'x') + return false; + + // allow +nan, -nan, +inf, -inf, any other letter, no + if (s[0] == '+' || s[0] == '-') { + is_nan = (strncasecmp(s+1, "nan", 3) == 0); + is_inf = (strncasecmp(s+1, "inf", 3) == 0); + if ((is_nan || is_inf) + && (isspace(s[4]) || s[4] == '\0')) + goto convert; + else if (! isdigit(s[1]) && s[1] != '.') + return false; + } + else if (! isdigit(s[0]) && s[0] != '.') + return false; + +convert: errno = 0; r = strtod(s, &ep); - if (ep == s || r == HUGE_VAL || errno == ERANGE) - return 0; - while (*ep == ' ' || *ep == '\t' || *ep == '\n') + if (ep == s || errno == ERANGE) + return false; + + if (isnan(r) && s[0] == '-' && signbit(r) == 0) + r = -r; + + if (result != NULL) + *result = r; + + /* + * check for trailing stuff + */ + while (isspace(*ep)) ep++; - if (*ep == '\0') - return 1; - else - return 0; + + if (no_trailing != NULL) + *no_trailing = (*ep == '\0'); + + // return true if found the end, or trailing stuff is allowed + retval = *ep == '\0' || trailing_stuff_ok; + + return retval; } @@ -22,7 +22,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20190529"; +const char *version = "version 20210215"; #define DEBUG #include <stdio.h> @@ -32,7 +32,6 @@ const char *version = "version 20190529"; #include <string.h> #include <signal.h> #include "awk.h" -#include "ytab.h" extern char **environ; extern int nfields; @@ -43,16 +42,41 @@ char *cmdname; /* gets argv[0] for error messages */ extern FILE *yyin; /* lex input file */ char *lexprog; /* points to program argument if it exists */ extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ -int compile_time = 2; /* for error printing: */ - /* 2 = cmdline, 1 = compile, 0 = running */ +enum compile_states compile_time = ERROR_PRINTING; -#define MAX_PFILE 20 /* max number of -f's */ +static char **pfile; /* program filenames from -f's */ +static size_t maxpfile; /* max program filename */ +static size_t npfile; /* number of filenames */ +static size_t curpfile; /* current filename */ -char *pfile[MAX_PFILE]; /* program filenames from -f's */ -int npfile = 0; /* number of filenames */ -int curpfile = 0; /* current filename */ +bool safe = false; /* true => "safe" mode */ -int safe = 0; /* 1 => "safe" mode */ +static noreturn void fpecatch(int n +#ifdef SA_SIGINFO + , siginfo_t *si, void *uc +#endif +) +{ +#ifdef SA_SIGINFO + static const char *emsg[] = { + [0] = "Unknown error", + [FPE_INTDIV] = "Integer divide by zero", + [FPE_INTOVF] = "Integer overflow", + [FPE_FLTDIV] = "Floating point divide by zero", + [FPE_FLTOVF] = "Floating point overflow", + [FPE_FLTUND] = "Floating point underflow", + [FPE_FLTRES] = "Floating point inexact result", + [FPE_FLTINV] = "Invalid Floating point operation", + [FPE_FLTSUB] = "Subscript out of range", + }; +#endif + FATAL("floating point exception" +#ifdef SA_SIGINFO + ": %s", (size_t)si->si_code < sizeof(emsg) / sizeof(emsg[0]) && + emsg[si->si_code] ? emsg[si->si_code] : emsg[0] +#endif + ); +} /* Can this work with recursive calls? I don't think so. void segvcatch(int n) @@ -61,32 +85,67 @@ void segvcatch(int n) } */ +static const char * +setfs(char *p) +{ + /* wart: t=>\t */ + if (p[0] == 't' && p[1] == '\0') + return "\t"; + else if (p[0] != '\0') + return p; + return NULL; +} + +static char * +getarg(int *argc, char ***argv, const char *msg) +{ + if ((*argv)[1][2] != '\0') { /* arg is -fsomething */ + return &(*argv)[1][2]; + } else { /* arg is -f something */ + (*argc)--; (*argv)++; + if (*argc <= 1) + FATAL("%s", msg); + return (*argv)[1]; + } +} + int main(int argc, char *argv[]) { const char *fs = NULL; + char *fn, *vn; setlocale(LC_CTYPE, ""); setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ cmdname = argv[0]; if (argc == 1) { - fprintf(stderr, - "usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n", + fprintf(stderr, + "usage: %s [-F fs] [-v var=value] [-f progfile | 'prog'] [file ...]\n", cmdname); exit(1); } - signal(SIGFPE, fpecatch); +#ifdef SA_SIGINFO + { + struct sigaction sa; + sa.sa_sigaction = fpecatch; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + (void)sigaction(SIGFPE, &sa, NULL); + } +#else + (void)signal(SIGFPE, fpecatch); +#endif /*signal(SIGSEGV, segvcatch); experiment */ + /* Set and keep track of the random seed */ srand_seed = 1; - srand(srand_seed); + srandom((unsigned long) srand_seed); yyin = NULL; symtab = makesymtab(NSYMTAB/NSYMTAB); while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { - if (strcmp(argv[1],"-version") == 0 || strcmp(argv[1],"--version") == 0) { + if (strcmp(argv[1], "-version") == 0 || strcmp(argv[1], "--version") == 0) { printf("awk %s\n", version); - exit(0); - break; + return 0; } if (strcmp(argv[1], "--") == 0) { /* explicit end of args */ argc--; @@ -96,53 +155,29 @@ int main(int argc, char *argv[]) switch (argv[1][1]) { case 's': if (strcmp(argv[1], "-safe") == 0) - safe = 1; + safe = true; break; case 'f': /* next argument is program filename */ - if (argv[1][2] != 0) { /* arg is -fsomething */ - if (npfile >= MAX_PFILE - 1) - FATAL("too many -f options"); - pfile[npfile++] = &argv[1][2]; - } else { /* arg is -f something */ - argc--; argv++; - if (argc <= 1) - FATAL("no program filename"); - if (npfile >= MAX_PFILE - 1) - FATAL("too many -f options"); - pfile[npfile++] = argv[1]; - } - break; + fn = getarg(&argc, &argv, "no program filename"); + if (npfile >= maxpfile) { + maxpfile += 20; + pfile = (char **) realloc(pfile, maxpfile * sizeof(*pfile)); + if (pfile == NULL) + FATAL("error allocating space for -f options"); + } + pfile[npfile++] = fn; + break; case 'F': /* set field separator */ - if (argv[1][2] != 0) { /* arg is -Fsomething */ - if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */ - fs = "\t"; - else if (argv[1][2] != 0) - fs = &argv[1][2]; - } else { /* arg is -F something */ - argc--; argv++; - if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */ - fs = "\t"; - else if (argc > 1 && argv[1][0] != 0) - fs = &argv[1][0]; - } - if (fs == NULL || *fs == '\0') + fs = setfs(getarg(&argc, &argv, "no field separator")); + if (fs == NULL) WARNING("field separator FS is empty"); break; case 'v': /* -v a=1 to be done NOW. one -v for each */ - if (argv[1][2] != 0) { /* arg is -vsomething */ - if (isclvar(&argv[1][2])) - setclvar(&argv[1][2]); - else - FATAL("invalid -v option argument: %s", &argv[1][2]); - } else { /* arg is -v something */ - argc--; argv++; - if (argc <= 1) - FATAL("no variable name"); - if (isclvar(argv[1])) - setclvar(argv[1]); - else - FATAL("invalid -v option argument: %s", argv[1]); - } + vn = getarg(&argc, &argv, "no variable name"); + if (isclvar(vn)) + setclvar(vn); + else + FATAL("invalid -v option argument: %s", vn); break; case 'd': dbg = atoi(&argv[1][2]); @@ -164,26 +199,30 @@ int main(int argc, char *argv[]) exit(0); FATAL("no program given"); } - dprintf( ("program = |%s|\n", argv[1]) ); + DPRINTF("program = |%s|\n", argv[1]); lexprog = argv[1]; argc--; argv++; } recinit(recsize); syminit(); - compile_time = 1; + compile_time = COMPILING; argv[0] = cmdname; /* put prog name at front of arglist */ - dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); + DPRINTF("argc=%d, argv[0]=%s\n", argc, argv[0]); arginit(argc, argv); if (!safe) envinit(environ); yyparse(); +#if 0 + // Doing this would comply with POSIX, but is not compatible with + // other awks and with what most users expect. So comment it out. setlocale(LC_NUMERIC, ""); /* back to whatever it is locally */ +#endif if (fs) *FS = qstring(fs, '\0'); - dprintf( ("errorflag=%d\n", errorflag) ); + DPRINTF("errorflag=%d\n", errorflag); if (errorflag == 0) { - compile_time = 0; + compile_time = RUNNING; run(winner); } else bracecheck(); @@ -216,7 +255,7 @@ int pgetc(void) /* get 1 character from awk program */ char *cursource(void) /* current source file name */ { if (npfile > 0) - return pfile[curpfile]; + return pfile[curpfile < npfile ? curpfile : curpfile - 1]; else return NULL; } @@ -1,7 +1,7 @@ # /**************************************************************** # Copyright (C) Lucent Technologies 1997 # All Rights Reserved -# +# # Permission to use, copy, modify, and distribute this software and # its documentation for any purpose and without fee is hereby # granted, provided that the above copyright notice appear in all @@ -11,7 +11,7 @@ # its entities not be used in advertising or publicity pertaining # to distribution of the software without specific, written prior # permission. -# +# # LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, # INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. # IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY @@ -22,6 +22,7 @@ # THIS SOFTWARE. # ****************************************************************/ +CFLAGS = -fsanitize=address -O1 -g -fno-omit-frame-pointer -fno-optimize-sibling-calls CFLAGS = -g CFLAGS = CFLAGS = -O2 @@ -30,57 +31,46 @@ CFLAGS = -O2 #CC = gcc -Wall -g -Wwrite-strings #CC = gcc -O4 -Wall -pedantic -fno-strict-aliasing #CC = gcc -fprofile-arcs -ftest-coverage # then gcov f1.c; cat f1.c.gcov -HOSTCC = gcc -g -Wall -pedantic +HOSTCC = gcc -g -Wall -pedantic -Wcast-qual CC = $(HOSTCC) # change this is cross-compiling. -# yacc options. pick one; this varies a lot by system. -#YFLAGS = -d -S -YACC = bison -d -y -#YACC = yacc -d -# -S uses sprintf in yacc parser instead of sprint +# By fiat, to make our lives easier, yacc is now defined to be bison. +# If you want something else, you're on your own. +YACC = bison -d OFILES = b.o main.o parse.o proctab.o tran.o lib.o run.o lex.o -SOURCE = awk.h ytab.c ytab.h proto.h awkgram.y lex.c b.c main.c \ - maketab.c parse.c lib.c run.c tran.c proctab.c +SOURCE = awk.h awkgram.tab.c awkgram.tab.h proto.h awkgram.y lex.c b.c main.c \ + maketab.c parse.c lib.c run.c tran.c proctab.c LISTING = awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \ - lib.c run.c tran.c + lib.c run.c tran.c -SHIP = README LICENSE FIXES $(SOURCE) ytab[ch].bak makefile \ +SHIP = README LICENSE FIXES $(SOURCE) awkgram.tab.[ch].bak makefile \ awk.1 -a.out: ytab.o $(OFILES) - $(CC) $(CFLAGS) ytab.o $(OFILES) $(ALLOC) -lm +a.out: awkgram.tab.o $(OFILES) + $(CC) $(CFLAGS) awkgram.tab.o $(OFILES) $(ALLOC) -lm -$(OFILES): awk.h ytab.h proto.h +$(OFILES): awk.h awkgram.tab.h proto.h -#Clear dependency for parallel build: (make -j) -#YACC generated y.tab.c and y.tab.h at the same time -#this needs to be a static pattern rules otherwise multiple target -#are mapped onto multiple executions of yacc, which overwrite -#each others outputs. -y%.c y%.h: awk.h proto.h awkgram.y +awkgram.tab.c awkgram.tab.h: awk.h proto.h awkgram.y $(YACC) $(YFLAGS) awkgram.y - mv y.$*.c y$*.c - mv y.$*.h y$*.h - -ytab.h: ytab.c proctab.c: maketab - ./maketab ytab.h >proctab.c + ./maketab awkgram.tab.h >proctab.c -maketab: ytab.h maketab.c +maketab: awkgram.tab.h maketab.c $(HOSTCC) $(CFLAGS) maketab.c -o maketab bundle: - @cp ytab.h ytabh.bak - @cp ytab.c ytabc.bak + @cp awkgram.tab.h awkgram.tab.h.bak + @cp awkgram.tab.c awkgram.tab.c.bak @bundle $(SHIP) tar: - @cp ytab.h ytabh.bak - @cp ytab.c ytabc.bak + @cp awkgram.tab.h awkgram.tab.h.bak + @cp awkgram.tab.c awkgram.tab.c.bak @bundle $(SHIP) >awk.shar @tar cf awk.tar $(SHIP) gzip awk.tar @@ -92,18 +82,30 @@ gitadd: git add README LICENSE FIXES \ awk.h proto.h awkgram.y lex.c b.c main.c maketab.c parse.c \ lib.c run.c tran.c \ - makefile awk.1 awktest.tar + makefile awk.1 testdir gitpush: - # only do this once: + # only do this once: # git remote add origin https://github.com/onetrueawk/awk.git git push -u origin master names: @echo $(LISTING) -clean: +test check: + ./REGRESS + +clean: testclean rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda # proctab.c -cleaner: - rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c ytab* +cleaner: testclean + rm -f a.out *.o *.obj maketab maketab.exe *.bb *.bbg *.da *.gcov *.gcno *.gcda proctab.c awkgram.tab.* + +# This is a bit of a band-aid until we can invest some more time +# in the test suite. +testclean: + cd testdir; rm -fr arnold-fixes beebe devnull echo foo* \ + glop glop1 glop2 lilly.diff tempbig tempsmall time + +# For the habits of GNU maintainers: +distclean: cleaner diff --git a/maketab.c b/maketab.c index dbe3d241fcc8..d4b756ad6706 100644 --- a/maketab.c +++ b/maketab.c @@ -25,14 +25,14 @@ THIS SOFTWARE. /* * this program makes the table to link function names * and type indices that is used by execute() in run.c. - * it finds the indices in ytab.h, produced by yacc. + * it finds the indices in awkgram.tab.h, produced by bison. */ #include <stdio.h> #include <string.h> #include <stdlib.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" struct xx { int token; @@ -104,6 +104,7 @@ struct xx { ARG, "arg", "arg" }, { VARNF, "getnf", "NF" }, { GETLINE, "awkgetline", "getline" }, + { GENSUB, "gensub", "gensub" }, { 0, "", "" }, }; @@ -118,12 +119,11 @@ int main(int argc, char *argv[]) char c; FILE *fp; char buf[200], name[200], def[200]; + enum { TOK_UNKNOWN, TOK_ENUM, TOK_DEFINE } tokentype = TOK_UNKNOWN; printf("#include <stdio.h>\n"); printf("#include \"awk.h\"\n"); - printf("#include \"ytab.h\"\n\n"); - for (i = SIZE; --i >= 0; ) - names[i] = ""; + printf("#include \"awkgram.tab.h\"\n\n"); if (argc != 2) { fprintf(stderr, "usage: maketab YTAB_H\n"); @@ -133,21 +133,41 @@ int main(int argc, char *argv[]) fprintf(stderr, "maketab can't open %s!\n", argv[1]); exit(1); } - printf("static char *printname[%d] = {\n", SIZE); + printf("static const char * const printname[%d] = {\n", SIZE); i = 0; while (fgets(buf, sizeof buf, fp) != NULL) { - n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); - if (c != '#' || (n != 4 && strcmp(def,"define") != 0)) /* not a valid #define */ - continue; - if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0) + // 199 is sizeof(def) - 1 + if (tokentype != TOK_ENUM) { + n = sscanf(buf, "%1c %199s %199s %d", &c, def, name, + &tok); + if (n == 4 && c == '#' && strcmp(def, "define") == 0) { + tokentype = TOK_DEFINE; + } else if (tokentype != TOK_UNKNOWN) { + continue; + } + } + if (tokentype != TOK_DEFINE) { + /* not a valid #define, bison uses enums now */ + n = sscanf(buf, "%199s = %d,\n", name, &tok); + if (n != 2) + continue; + tokentype = TOK_ENUM; + } + if (strcmp(name, "YYSTYPE_IS_DECLARED") == 0) { + tokentype = TOK_UNKNOWN; continue; + } if (tok < FIRSTTOKEN || tok > LASTTOKEN) { + tokentype = TOK_UNKNOWN; /* fprintf(stderr, "maketab funny token %d %s ignored\n", tok, buf); */ continue; } - names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); - strcpy(names[tok-FIRSTTOKEN], name); - printf("\t(char *) \"%s\",\t/* %d */\n", name, tok); + names[tok-FIRSTTOKEN] = strdup(name); + if (names[tok-FIRSTTOKEN] == NULL) { + fprintf(stderr, "maketab out of space copying %s", name); + continue; + } + printf("\t\"%s\",\t/* %d */\n", name, tok); i++; } printf("};\n\n"); @@ -156,20 +176,18 @@ int main(int argc, char *argv[]) table[p->token-FIRSTTOKEN] = p->name; printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); for (i=0; i<SIZE; i++) - if (table[i]==0) - printf("\tnullproc,\t/* %s */\n", names[i]); - else - printf("\t%s,\t/* %s */\n", table[i], names[i]); + printf("\t%s,\t/* %s */\n", + table[i] ? table[i] : "nullproc", names[i] ? names[i] : ""); printf("};\n\n"); - printf("char *tokname(int n)\n"); /* print a tokname() function */ + printf("const char *tokname(int n)\n"); /* print a tokname() function */ printf("{\n"); - printf(" static char buf[100];\n\n"); - printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); - printf(" sprintf(buf, \"token %%d\", n);\n"); - printf(" return buf;\n"); - printf(" }\n"); - printf(" return printname[n-FIRSTTOKEN];\n"); + printf("\tstatic char buf[100];\n\n"); + printf("\tif (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); + printf("\t\tsnprintf(buf, sizeof(buf), \"token %%d\", n);\n"); + printf("\t\treturn buf;\n"); + printf("\t}\n"); + printf("\treturn printname[n-FIRSTTOKEN];\n"); printf("}\n"); return 0; } @@ -27,13 +27,13 @@ THIS SOFTWARE. #include <string.h> #include <stdlib.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" Node *nodealloc(int n) { Node *x; - x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); + x = (Node *) malloc(sizeof(*x) + (n-1) * sizeof(x)); if (x == NULL) FATAL("out of space in nodealloc"); x->nnext = NULL; @@ -93,6 +93,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = nodealloc(5); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + x->narg[4] = f; + return(x); +} + Node *stat1(int a, Node *b) { Node *x; @@ -165,6 +179,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e) return(x); } +Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f) +{ + Node *x; + + x = node5(a,b,c,d,e,f); + x->ntype = NEXPR; + return(x); +} + Node *celltonode(Cell *a, int b) { Node *x; @@ -250,7 +273,7 @@ void defn(Cell *v, Node *vl, Node *st) /* turn on FCN bit in definition, */ for (p = vl; p; p = p->nnext) n++; v->fval = n; - dprintf( ("defining func %s (%d args)\n", v->nval, n) ); + DPRINTF("defining func %s (%d args)\n", v->nval, n); } int isarg(const char *s) /* is s in argument list for current function? */ @@ -259,7 +282,7 @@ int isarg(const char *s) /* is s in argument list for current function? */ Node *p = arglist; int n; - for (n = 0; p != 0; p = p->nnext, n++) + for (n = 0; p != NULL; p = p->nnext, n++) if (strcmp(((Cell *)(p->narg[0]))->nval, s) == 0) return n; return -1; diff --git a/proctab.c b/proctab.c index ff212c416c3a..96fad36e601c 100644 --- a/proctab.c +++ b/proctab.c @@ -1,106 +1,108 @@ #include <stdio.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" -static char *printname[94] = { - (char *) "FIRSTTOKEN", /* 258 */ - (char *) "PROGRAM", /* 259 */ - (char *) "PASTAT", /* 260 */ - (char *) "PASTAT2", /* 261 */ - (char *) "XBEGIN", /* 262 */ - (char *) "XEND", /* 263 */ - (char *) "NL", /* 264 */ - (char *) "ARRAY", /* 265 */ - (char *) "MATCH", /* 266 */ - (char *) "NOTMATCH", /* 267 */ - (char *) "MATCHOP", /* 268 */ - (char *) "FINAL", /* 269 */ - (char *) "DOT", /* 270 */ - (char *) "ALL", /* 271 */ - (char *) "CCL", /* 272 */ - (char *) "NCCL", /* 273 */ - (char *) "CHAR", /* 274 */ - (char *) "OR", /* 275 */ - (char *) "STAR", /* 276 */ - (char *) "QUEST", /* 277 */ - (char *) "PLUS", /* 278 */ - (char *) "EMPTYRE", /* 279 */ - (char *) "AND", /* 280 */ - (char *) "BOR", /* 281 */ - (char *) "APPEND", /* 282 */ - (char *) "EQ", /* 283 */ - (char *) "GE", /* 284 */ - (char *) "GT", /* 285 */ - (char *) "LE", /* 286 */ - (char *) "LT", /* 287 */ - (char *) "NE", /* 288 */ - (char *) "IN", /* 289 */ - (char *) "ARG", /* 290 */ - (char *) "BLTIN", /* 291 */ - (char *) "BREAK", /* 292 */ - (char *) "CLOSE", /* 293 */ - (char *) "CONTINUE", /* 294 */ - (char *) "DELETE", /* 295 */ - (char *) "DO", /* 296 */ - (char *) "EXIT", /* 297 */ - (char *) "FOR", /* 298 */ - (char *) "FUNC", /* 299 */ - (char *) "SUB", /* 300 */ - (char *) "GSUB", /* 301 */ - (char *) "IF", /* 302 */ - (char *) "INDEX", /* 303 */ - (char *) "LSUBSTR", /* 304 */ - (char *) "MATCHFCN", /* 305 */ - (char *) "NEXT", /* 306 */ - (char *) "NEXTFILE", /* 307 */ - (char *) "ADD", /* 308 */ - (char *) "MINUS", /* 309 */ - (char *) "MULT", /* 310 */ - (char *) "DIVIDE", /* 311 */ - (char *) "MOD", /* 312 */ - (char *) "ASSIGN", /* 313 */ - (char *) "ASGNOP", /* 314 */ - (char *) "ADDEQ", /* 315 */ - (char *) "SUBEQ", /* 316 */ - (char *) "MULTEQ", /* 317 */ - (char *) "DIVEQ", /* 318 */ - (char *) "MODEQ", /* 319 */ - (char *) "POWEQ", /* 320 */ - (char *) "PRINT", /* 321 */ - (char *) "PRINTF", /* 322 */ - (char *) "SPRINTF", /* 323 */ - (char *) "ELSE", /* 324 */ - (char *) "INTEST", /* 325 */ - (char *) "CONDEXPR", /* 326 */ - (char *) "POSTINCR", /* 327 */ - (char *) "PREINCR", /* 328 */ - (char *) "POSTDECR", /* 329 */ - (char *) "PREDECR", /* 330 */ - (char *) "VAR", /* 331 */ - (char *) "IVAR", /* 332 */ - (char *) "VARNF", /* 333 */ - (char *) "CALL", /* 334 */ - (char *) "NUMBER", /* 335 */ - (char *) "STRING", /* 336 */ - (char *) "REGEXPR", /* 337 */ - (char *) "GETLINE", /* 338 */ - (char *) "RETURN", /* 339 */ - (char *) "SPLIT", /* 340 */ - (char *) "SUBSTR", /* 341 */ - (char *) "WHILE", /* 342 */ - (char *) "CAT", /* 343 */ - (char *) "NOT", /* 344 */ - (char *) "UMINUS", /* 345 */ - (char *) "UPLUS", /* 346 */ - (char *) "POWER", /* 347 */ - (char *) "DECR", /* 348 */ - (char *) "INCR", /* 349 */ - (char *) "INDIRECT", /* 350 */ - (char *) "LASTTOKEN", /* 351 */ +static const char * const printname[96] = { + "FIRSTTOKEN", /* 258 */ + "PROGRAM", /* 259 */ + "PASTAT", /* 260 */ + "PASTAT2", /* 261 */ + "XBEGIN", /* 262 */ + "XEND", /* 263 */ + "NL", /* 264 */ + "ARRAY", /* 265 */ + "MATCH", /* 266 */ + "NOTMATCH", /* 267 */ + "MATCHOP", /* 268 */ + "FINAL", /* 269 */ + "DOT", /* 270 */ + "ALL", /* 271 */ + "CCL", /* 272 */ + "NCCL", /* 273 */ + "CHAR", /* 274 */ + "OR", /* 275 */ + "STAR", /* 276 */ + "QUEST", /* 277 */ + "PLUS", /* 278 */ + "EMPTYRE", /* 279 */ + "ZERO", /* 280 */ + "AND", /* 281 */ + "BOR", /* 282 */ + "APPEND", /* 283 */ + "EQ", /* 284 */ + "GE", /* 285 */ + "GT", /* 286 */ + "LE", /* 287 */ + "LT", /* 288 */ + "NE", /* 289 */ + "IN", /* 290 */ + "ARG", /* 291 */ + "BLTIN", /* 292 */ + "BREAK", /* 293 */ + "CLOSE", /* 294 */ + "CONTINUE", /* 295 */ + "DELETE", /* 296 */ + "DO", /* 297 */ + "EXIT", /* 298 */ + "FOR", /* 299 */ + "FUNC", /* 300 */ + "GENSUB", /* 301 */ + "SUB", /* 302 */ + "GSUB", /* 303 */ + "IF", /* 304 */ + "INDEX", /* 305 */ + "LSUBSTR", /* 306 */ + "MATCHFCN", /* 307 */ + "NEXT", /* 308 */ + "NEXTFILE", /* 309 */ + "ADD", /* 310 */ + "MINUS", /* 311 */ + "MULT", /* 312 */ + "DIVIDE", /* 313 */ + "MOD", /* 314 */ + "ASSIGN", /* 315 */ + "ASGNOP", /* 316 */ + "ADDEQ", /* 317 */ + "SUBEQ", /* 318 */ + "MULTEQ", /* 319 */ + "DIVEQ", /* 320 */ + "MODEQ", /* 321 */ + "POWEQ", /* 322 */ + "PRINT", /* 323 */ + "PRINTF", /* 324 */ + "SPRINTF", /* 325 */ + "ELSE", /* 326 */ + "INTEST", /* 327 */ + "CONDEXPR", /* 328 */ + "POSTINCR", /* 329 */ + "PREINCR", /* 330 */ + "POSTDECR", /* 331 */ + "PREDECR", /* 332 */ + "VAR", /* 333 */ + "IVAR", /* 334 */ + "VARNF", /* 335 */ + "CALL", /* 336 */ + "NUMBER", /* 337 */ + "STRING", /* 338 */ + "REGEXPR", /* 339 */ + "GETLINE", /* 340 */ + "RETURN", /* 341 */ + "SPLIT", /* 342 */ + "SUBSTR", /* 343 */ + "WHILE", /* 344 */ + "CAT", /* 345 */ + "NOT", /* 346 */ + "UMINUS", /* 347 */ + "UPLUS", /* 348 */ + "POWER", /* 349 */ + "DECR", /* 350 */ + "INCR", /* 351 */ + "INDIRECT", /* 352 */ + "LASTTOKEN", /* 353 */ }; -Cell *(*proctab[94])(Node **, int) = { +Cell *(*proctab[96])(Node **, int) = { nullproc, /* FIRSTTOKEN */ program, /* PROGRAM */ pastat, /* PASTAT */ @@ -123,6 +125,7 @@ Cell *(*proctab[94])(Node **, int) = { nullproc, /* QUEST */ nullproc, /* PLUS */ nullproc, /* EMPTYRE */ + nullproc, /* ZERO */ boolop, /* AND */ boolop, /* BOR */ nullproc, /* APPEND */ @@ -143,6 +146,7 @@ Cell *(*proctab[94])(Node **, int) = { jump, /* EXIT */ forstat, /* FOR */ nullproc, /* FUNC */ + gensub, /* GENSUB */ sub, /* SUB */ gsub, /* GSUB */ ifstat, /* IF */ @@ -197,12 +201,12 @@ Cell *(*proctab[94])(Node **, int) = { nullproc, /* LASTTOKEN */ }; -char *tokname(int n) +const char *tokname(int n) { static char buf[100]; if (n < FIRSTTOKEN || n > LASTTOKEN) { - sprintf(buf, "token %d", n); + snprintf(buf, sizeof(buf), "token %d", n); return buf; } return printname[n-FIRSTTOKEN]; @@ -38,15 +38,15 @@ extern int yylook(void); extern int yyback(int *, int); extern int yyinput(void); -extern fa *makedfa(const char *, int); -extern fa *mkdfa(const char *, int); -extern int makeinit(fa *, int); +extern fa *makedfa(const char *, bool); +extern fa *mkdfa(const char *, bool); +extern int makeinit(fa *, bool); extern void penter(Node *); extern void freetr(Node *); -extern int hexstr(uschar **); -extern int quoted(uschar **); +extern int hexstr(const uschar **); +extern int quoted(const uschar **); extern char *cclenter(const char *); -extern void overflo(const char *) __attribute__((__noreturn__)); +extern noreturn void overflo(const char *); extern void cfoll(fa *, Node *); extern int first(Node *); extern void follow(Node *); @@ -54,6 +54,7 @@ extern int member(int, const char *); extern int match(fa *, const char *); extern int pmatch(fa *, const char *); extern int nematch(fa *, const char *); +extern bool fnematch(fa *, FILE *, char **, int *, int); extern Node *reparse(const char *); extern Node *regexp(void); extern Node *primary(void); @@ -73,12 +74,14 @@ extern Node *node1(int, Node *); extern Node *node2(int, Node *, Node *); extern Node *node3(int, Node *, Node *, Node *); extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *node5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat3(int, Node *, Node *, Node *); extern Node *op2(int, Node *, Node *); extern Node *op1(int, Node *); extern Node *stat1(int, Node *); extern Node *op3(int, Node *, Node *, Node *); extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *op5(int, Node *, Node *, Node *, Node *, Node *); extern Node *stat2(int, Node *, Node *); extern Node *stat4(int, Node *, Node *, Node *, Node *); extern Node *celltonode(Cell *, int); @@ -88,7 +91,7 @@ extern Node *pa2stat(Node *, Node *, Node *); extern Node *linkum(Node *, Node *); extern void defn(Cell *, Node *, Node *); extern int isarg(const char *); -extern char *tokname(int); +extern const char *tokname(int); extern Cell *(*proctab[])(Node **, int); extern int ptoi(void *); extern Node *itonp(int); @@ -110,15 +113,18 @@ extern double getfval(Cell *); extern char *getsval(Cell *); extern char *getpssval(Cell *); /* for print */ extern char *tostring(const char *); +extern char *tostringN(const char *, size_t); extern char *qstring(const char *, int); +extern Cell *catstr(Cell *, Cell *); extern void recinit(unsigned int); extern void initgetrec(void); extern void makefields(int, int); extern void growfldtab(int n); -extern int getrec(char **, int *, int); +extern void savefs(void); +extern int getrec(char **, int *, bool); extern void nextfile(void); -extern int readrec(char **buf, int *bufsize, FILE *inf); +extern int readrec(char **buf, int *bufsize, FILE *inf, bool isnew); extern char *getargv(int); extern void setclvar(char *); extern void fldbld(void); @@ -129,18 +135,22 @@ extern int refldbld(const char *, const char *); extern void recbld(void); extern Cell *fieldadr(int); extern void yyerror(const char *); -extern void fpecatch(int); extern void bracecheck(void); extern void bcheck2(int, int, int); -extern void SYNTAX(const char *, ...); -extern void FATAL(const char *, ...) __attribute__((__noreturn__)); -extern void WARNING(const char *, ...); +extern void SYNTAX(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +extern noreturn void FATAL(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +extern void WARNING(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); extern void error(void); extern void eprint(void); extern void bclass(int); extern double errcheck(double, const char *); extern int isclvar(const char *); -extern int is_number(const char *); +extern bool is_valid_number(const char *s, bool trailing_stuff_ok, + bool *no_trailing, double *result); +#define is_number(s, val) is_valid_number(s, false, NULL, val) extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, const char *what); extern void run(Node *); @@ -185,12 +195,13 @@ extern Cell *bltin(Node **, int); extern Cell *printstat(Node **, int); extern Cell *nullproc(Node **, int); extern FILE *redirect(int, Node *); -extern FILE *openfile(int, const char *); +extern FILE *openfile(int, const char *, bool *); extern const char *filename(FILE *); extern Cell *closefile(Node **, int); extern void closeall(void); extern Cell *sub(Node **, int); extern Cell *gsub(Node **, int); +extern Cell *gensub(Node **, int); extern FILE *popen(const char *, const char *); extern int pclose(FILE *); @@ -25,6 +25,10 @@ THIS SOFTWARE. #define DEBUG #include <stdio.h> #include <ctype.h> +#include <errno.h> +#include <wchar.h> +#include <wctype.h> +#include <fcntl.h> #include <setjmp.h> #include <limits.h> #include <math.h> @@ -34,13 +38,14 @@ THIS SOFTWARE. #include <sys/types.h> #include <sys/wait.h> #include "awk.h" -#include "ytab.h" +#include "awkgram.tab.h" -#define tempfree(x) if (istemp(x)) tfree(x); else - -/* -#undef tempfree +static void stdinit(void); +static void flush_all(void); +#if 1 +#define tempfree(x) do { if (istemp(x)) tfree(x); } while (/*CONSTCOND*/0) +#else void tempfree(Cell *p) { if (p->ctype == OCELL && (p->csub < CUNK || p->csub > CFREE)) { WARNING("bad csub %d in Cell %d %s", @@ -49,7 +54,7 @@ void tempfree(Cell *p) { if (istemp(p)) tfree(p); } -*/ +#endif /* do we really need these? */ /* #ifdef _NFILE */ @@ -73,23 +78,23 @@ extern Awkfloat srand_seed; Node *winner = NULL; /* root of parse tree */ Cell *tmps; /* free temporary cells for execution */ -static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL }; +static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM, NULL, NULL }; Cell *True = &truecell; -static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL }; +static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM, NULL, NULL }; Cell *False = &falsecell; -static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL }; +static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jbreak = &breakcell; -static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL }; +static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jcont = &contcell; -static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL }; +static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jnext = &nextcell; -static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL }; +static Cell nextfilecell ={ OJUMP, JNEXTFILE, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jnextfile = &nextfilecell; -static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL }; +static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jexit = &exitcell; -static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL }; +static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM, NULL, NULL }; Cell *jret = &retcell; -static Cell tempcell ={ OCELL, CTEMP, 0, "", 0.0, NUM|STR|DONTFREE, NULL }; +static Cell tempcell ={ OCELL, CTEMP, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; Node *curnode = NULL; /* the node being executed, for debugging */ @@ -114,7 +119,7 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, if (rminlen) minlen += quantum - rminlen; tbuf = (char *) realloc(*pbuf, minlen); - dprintf( ("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void *) *pbuf, (void *) tbuf) ); + DPRINTF("adjbuf %s: %d %d (pbuf=%p, tbuf=%p)\n", whatrtn, *psiz, minlen, (void*)*pbuf, (void*)tbuf); if (tbuf == NULL) { if (whatrtn) FATAL("out of memory in %s", whatrtn); @@ -130,7 +135,6 @@ int adjbuf(char **pbuf, int *psiz, int minlen, int quantum, char **pbptr, void run(Node *a) /* execution of parse tree starts here */ { - extern void stdinit(void); stdinit(); execute(a); @@ -189,7 +193,7 @@ Cell *program(Node **a, int n) /* execute an awk program */ tempfree(x); } if (a[1] || a[2]) - while (getrec(&record, &recsize, 1) > 0) { + while (getrec(&record, &recsize, true) > 0) { x = execute(a[1]); if (isexit(x)) break; @@ -219,11 +223,11 @@ struct Frame { /* stack frame for awk function calls */ struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ int nframe = 0; /* number of frames allocated */ -struct Frame *fp = NULL; /* frame pointer. bottom level unused */ +struct Frame *frp = NULL; /* frame pointer. bottom level unused */ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ { - static Cell newcopycell = { OCELL, CCOPY, 0, "", 0.0, NUM|STR|DONTFREE, NULL }; + static const Cell newcopycell = { OCELL, CCOPY, 0, EMPTY, 0.0, NUM|STR|DONTFREE, NULL, NULL }; int i, ncall, ndef; int freed = 0; /* handles potential double freeing when fcn & param share a tempcell */ Node *x; @@ -236,25 +240,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (!isfcn(fcn)) FATAL("calling undefined function %s", s); if (frame == NULL) { - fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); + frp = frame = (struct Frame *) calloc(nframe += 100, sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames calling %s", s); } for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ ncall++; ndef = (int) fcn->fval; /* args in defn */ - dprintf( ("calling %s, %d args (%d in defn), fp=%d\n", s, ncall, ndef, (int) (fp-frame)) ); + DPRINTF("calling %s, %d args (%d in defn), frp=%d\n", s, ncall, ndef, (int) (frp-frame)); if (ncall > ndef) WARNING("function %s called with %d args, uses only %d", s, ncall, ndef); if (ncall + ndef > NARGS) FATAL("function %s has %d arguments, limit %d", s, ncall+ndef, NARGS); for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ - dprintf( ("evaluate args[%d], fp=%d:\n", i, (int) (fp-frame)) ); + DPRINTF("evaluate args[%d], frp=%d:\n", i, (int) (frp-frame)); y = execute(x); oargs[i] = y; - dprintf( ("args[%d]: %s %f <%s>, t=%o\n", - i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval) ); + DPRINTF("args[%d]: %s %f <%s>, t=%o\n", + i, NN(y->nval), y->fval, isarr(y) ? "(array)" : NN(y->sval), y->tval); if (isfcn(y)) FATAL("can't use function %s as argument in %s", y->nval, s); if (isarr(y)) @@ -267,26 +271,25 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ args[i] = gettemp(); *args[i] = newcopycell; } - fp++; /* now ok to up frame */ - if (fp >= frame + nframe) { - int dfp = fp - frame; /* old index */ - frame = (struct Frame *) - realloc((char *) frame, (nframe += 100) * sizeof(struct Frame)); + frp++; /* now ok to up frame */ + if (frp >= frame + nframe) { + int dfp = frp - frame; /* old index */ + frame = (struct Frame *) realloc(frame, (nframe += 100) * sizeof(*frame)); if (frame == NULL) FATAL("out of space for stack frames in %s", s); - fp = frame + dfp; + frp = frame + dfp; } - fp->fcncell = fcn; - fp->args = args; - fp->nargs = ndef; /* number defined with (excess are locals) */ - fp->retval = gettemp(); + frp->fcncell = fcn; + frp->args = args; + frp->nargs = ndef; /* number defined with (excess are locals) */ + frp->retval = gettemp(); - dprintf( ("start exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + DPRINTF("start exec of %s, frp=%d\n", s, (int) (frp-frame)); y = execute((Node *)(fcn->sval)); /* execute body */ - dprintf( ("finished exec of %s, fp=%d\n", s, (int) (fp-frame)) ); + DPRINTF("finished exec of %s, frp=%d\n", s, (int) (frp-frame)); for (i = 0; i < ndef; i++) { - Cell *t = fp->args[i]; + Cell *t = frp->args[i]; if (isarr(t)) { if (t->csub == CCOPY) { if (i >= ncall) { @@ -315,9 +318,9 @@ Cell *call(Node **a, int n) /* function call. very kludgy and fragile */ if (freed == 0) { tempfree(y); /* don't free twice! */ } - z = fp->retval; /* return value */ - dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval) ); - fp--; + z = frp->retval; /* return value */ + DPRINTF("%s returns %g |%s| %o\n", s, getfval(z), getsval(z), z->tval); + frp--; return(z); } @@ -344,11 +347,11 @@ Cell *arg(Node **a, int n) /* nth argument of a function */ { n = ptoi(a[0]); /* argument number, counting from 0 */ - dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); - if (n+1 > fp->nargs) + DPRINTF("arg(%d), frp->nargs=%d\n", n, frp->nargs); + if (n+1 > frp->nargs) FATAL("argument #%d of function %s was not supplied", - n+1, fp->fcncell->nval); - return fp->args[n]; + n+1, frp->fcncell->nval); + return frp->args[n]; } Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ @@ -367,14 +370,14 @@ Cell *jump(Node **a, int n) /* break, continue, next, nextfile, return */ if (a[0] != NULL) { y = execute(a[0]); if ((y->tval & (STR|NUM)) == (STR|NUM)) { - setsval(fp->retval, getsval(y)); - fp->retval->fval = getfval(y); - fp->retval->tval |= NUM; + setsval(frp->retval, getsval(y)); + frp->retval->fval = getfval(y); + frp->retval->tval |= NUM; } else if (y->tval & STR) - setsval(fp->retval, getsval(y)); + setsval(frp->retval, getsval(y)); else if (y->tval & NUM) - setfval(fp->retval, getfval(y)); + setfval(frp->retval, getfval(y)); else /* can't happen */ FATAL("bad type variable %d", y->tval); tempfree(y); @@ -403,6 +406,8 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ char *buf; int bufsize = recsize; int mode; + bool newflag; + double result; if ((buf = (char *) malloc(bufsize)) == NULL) FATAL("out of memory in getline"); @@ -414,38 +419,38 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ mode = ptoi(a[1]); if (mode == '|') /* input pipe */ mode = LE; /* arbitrary flag */ - fp = openfile(mode, getsval(x)); + fp = openfile(mode, getsval(x), &newflag); tempfree(x); if (fp == NULL) n = -1; else - n = readrec(&buf, &bufsize, fp); + n = readrec(&buf, &bufsize, fp, newflag); if (n <= 0) { ; } else if (a[0] != NULL) { /* getline var <file */ x = execute(a[0]); setsval(x, buf); - if (is_number(x->sval)) { - x->fval = atof(x->sval); + if (is_number(x->sval, & result)) { + x->fval = result; x->tval |= NUM; } tempfree(x); } else { /* getline <file */ setsval(fldtab[0], buf); - if (is_number(fldtab[0]->sval)) { - fldtab[0]->fval = atof(fldtab[0]->sval); + if (is_number(fldtab[0]->sval, & result)) { + fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } } } else { /* bare getline; use current input */ if (a[0] == NULL) /* getline */ - n = getrec(&record, &recsize, 1); + n = getrec(&record, &recsize, true); else { /* getline var */ - n = getrec(&buf, &bufsize, 0); + n = getrec(&buf, &bufsize, false); x = execute(a[0]); setsval(x, buf); - if (is_number(x->sval)) { - x->fval = atof(x->sval); + if (is_number(x->sval, & result)) { + x->fval = result; x->tval |= NUM; } tempfree(x); @@ -458,38 +463,57 @@ Cell *awkgetline(Node **a, int n) /* get next line from specific input */ Cell *getnf(Node **a, int n) /* get NF */ { - if (donefld == 0) + if (!donefld) fldbld(); return (Cell *) a[0]; } -Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +static char * +makearraystring(Node *p, const char *func) { - Cell *x, *y, *z; - char *s; - Node *np; char *buf; int bufsz = recsize; - int nsub; + size_t blen; - if ((buf = (char *) malloc(bufsz)) == NULL) - FATAL("out of memory in array"); + if ((buf = (char *) malloc(bufsz)) == NULL) { + FATAL("%s: out of memory", func); + } - x = execute(a[0]); /* Cell* for symbol table */ - buf[0] = 0; - for (np = a[1]; np; np = np->nnext) { - y = execute(np); /* subscript */ - s = getsval(y); - nsub = strlen(getsval(subseploc)); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "array")) - FATAL("out of memory for %s[%s...]", x->nval, buf); - strcat(buf, s); - if (np->nnext) - strcat(buf, *SUBSEP); - tempfree(y); + blen = 0; + buf[blen] = '\0'; + + for (; p; p = p->nnext) { + Cell *x = execute(p); /* expr */ + char *s = getsval(x); + size_t seplen = strlen(getsval(subseploc)); + size_t nsub = p->nnext ? seplen : 0; + size_t slen = strlen(s); + size_t tlen = blen + slen + nsub; + + if (!adjbuf(&buf, &bufsz, tlen + 1, recsize, 0, func)) { + FATAL("%s: out of memory %s[%s...]", + func, x->nval, buf); + } + memcpy(buf + blen, s, slen); + if (nsub) { + memcpy(buf + blen + slen, *SUBSEP, nsub); + } + buf[tlen] = '\0'; + blen = tlen; + tempfree(x); } + return buf; +} + +Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ +{ + Cell *x, *z; + char *buf; + + x = execute(a[0]); /* Cell* for symbol table */ + buf = makearraystring(a[1], __func__); if (!isarr(x)) { - dprintf( ("making %s into an array\n", NN(x->nval)) ); + DPRINTF("making %s into an array\n", NN(x->nval)); if (freeable(x)) xfree(x->sval); x->tval &= ~(STR|NUM|DONTFREE); @@ -506,36 +530,21 @@ Cell *array(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts */ { - Cell *x, *y; - Node *np; - char *s; - int nsub; + Cell *x; x = execute(a[0]); /* Cell* for symbol table */ + if (x == symtabloc) { + FATAL("cannot delete SYMTAB or its elements"); + } if (!isarr(x)) return True; - if (a[1] == 0) { /* delete the elements, not the table */ + if (a[1] == NULL) { /* delete the elements, not the table */ freesymtab(x); x->tval &= ~STR; x->tval |= ARR; x->sval = (char *) makesymtab(NSYMTAB); } else { - int bufsz = recsize; - char *buf; - if ((buf = (char *) malloc(bufsz)) == NULL) - FATAL("out of memory in adelete"); - buf[0] = 0; - for (np = a[1]; np; np = np->nnext) { - y = execute(np); /* subscript */ - s = getsval(y); - nsub = strlen(getsval(subseploc)); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "awkdelete")) - FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); - if (np->nnext) - strcat(buf, *SUBSEP); - tempfree(y); - } + char *buf = makearraystring(a[1], __func__); freeelem(x, buf); free(buf); } @@ -545,37 +554,19 @@ Cell *awkdelete(Node **a, int n) /* a[0] is symtab, a[1] is list of subscripts * Cell *intest(Node **a, int n) /* a[0] is index (list), a[1] is symtab */ { - Cell *x, *ap, *k; - Node *p; + Cell *ap, *k; char *buf; - char *s; - int bufsz = recsize; - int nsub; ap = execute(a[1]); /* array name */ if (!isarr(ap)) { - dprintf( ("making %s into an array\n", ap->nval) ); + DPRINTF("making %s into an array\n", ap->nval); if (freeable(ap)) xfree(ap->sval); ap->tval &= ~(STR|NUM|DONTFREE); ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); } - if ((buf = (char *) malloc(bufsz)) == NULL) { - FATAL("out of memory in intest"); - } - buf[0] = 0; - for (p = a[0]; p; p = p->nnext) { - x = execute(p); /* expr */ - s = getsval(x); - nsub = strlen(getsval(subseploc)); - if (!adjbuf(&buf, &bufsz, strlen(buf)+strlen(s)+nsub+1, recsize, 0, "intest")) - FATAL("out of memory deleting %s[%s...]", x->nval, buf); - strcat(buf, s); - tempfree(x); - if (p->nnext) - strcat(buf, *SUBSEP); - } + buf = makearraystring(a[0], __func__); k = lookup(buf, (Array *) ap->sval); tempfree(ap); free(buf); @@ -600,7 +591,7 @@ Cell *matchop(Node **a, int n) /* ~ and match() */ } x = execute(a[1]); /* a[1] = target text */ s = getsval(x); - if (a[0] == 0) /* a[1] == 0: already-compiled reg expr */ + if (a[0] == NULL) /* a[1] == 0: already-compiled reg expr */ i = (*mf)((fa *) a[2], s); else { y = execute(a[2]); /* a[2] = regular expr */ @@ -697,7 +688,7 @@ Cell *relop(Node **a, int n) /* a[0 < a[1], etc. */ void tfree(Cell *a) /* free a tempcell */ { if (freeable(a)) { - dprintf( ("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval) ); + DPRINTF("freeing %s %s %o\n", NN(a->nval), NN(a->sval), a->tval); xfree(a->sval); } if (a == tmps) @@ -711,12 +702,12 @@ Cell *gettemp(void) /* get a tempcell */ Cell *x; if (!tmps) { - tmps = (Cell *) calloc(100, sizeof(Cell)); + tmps = (Cell *) calloc(100, sizeof(*tmps)); if (!tmps) FATAL("out of space for temporaries"); - for(i = 1; i < 100; i++) + for (i = 1; i < 100; i++) tmps[i-1].cnext = &tmps[i]; - tmps[i-1].cnext = 0; + tmps[i-1].cnext = NULL; } x = tmps; tmps = x->cnext; @@ -736,7 +727,7 @@ Cell *indirect(Node **a, int n) /* $( a[0] ) */ if ((Awkfloat)INT_MAX < val) FATAL("trying to access out of range field %s", x->nval); m = (int) val; - if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */ + if (m == 0 && !is_number(s = getsval(x), NULL)) /* suspicion! */ FATAL("illegal field $(%s), name \"%s\"", s, x->nval); /* BUG: can x->nval ever be null??? */ tempfree(x); @@ -751,18 +742,18 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ int k, m, n; char *s; int temp; - Cell *x, *y, *z = 0; + Cell *x, *y, *z = NULL; x = execute(a[0]); y = execute(a[1]); - if (a[2] != 0) + if (a[2] != NULL) z = execute(a[2]); s = getsval(x); k = strlen(s) + 1; if (k <= 1) { tempfree(x); tempfree(y); - if (a[2] != 0) { + if (a[2] != NULL) { tempfree(z); } x = gettemp(); @@ -775,7 +766,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ else if (m > k) m = k; tempfree(y); - if (a[2] != 0) { + if (a[2] != NULL) { n = (int) getfval(z); tempfree(z); } else @@ -784,7 +775,7 @@ Cell *substr(Node **a, int nnn) /* substr(a[0], a[1], a[2]) */ n = 0; else if (n > k - m) n = k - m; - dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); + DPRINTF("substr: m=%d, n=%d, s=%s\n", m, n, s); y = gettemp(); temp = s[n+m-1]; /* with thanks to John Linderman */ s[n+m-1] = '\0'; @@ -807,8 +798,8 @@ Cell *sindex(Node **a, int nnn) /* index(a[0], a[1]) */ z = gettemp(); for (p1 = s1; *p1 != '\0'; p1++) { - for (q=p1, p2=s2; *p2 != '\0' && *q == *p2; q++, p2++) - ; + for (q = p1, p2 = s2; *p2 != '\0' && *q == *p2; q++, p2++) + continue; if (*p2 == '\0') { v = (Awkfloat) (p1 - s1 + 1); /* origin 1 */ break; @@ -833,16 +824,18 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co int fmtsz = recsize; char *buf = *pbuf; int bufsize = *pbufsize; +#define FMTSZ(a) (fmtsz - ((a) - fmt)) +#define BUFSZ(a) (bufsize - ((a) - buf)) - static int first = 1; - static int have_a_format = 0; + static bool first = true; + static bool have_a_format = false; if (first) { - char buf[100]; + char xbuf[100]; - sprintf(buf, "%a", 42.0); - have_a_format = (strcmp(buf, "0x1.5p+5") == 0); - first = 0; + snprintf(xbuf, sizeof(xbuf), "%a", 42.0); + have_a_format = (strcmp(xbuf, "0x1.5p+5") == 0); + first = false; } os = s; @@ -868,8 +861,13 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co for (t = fmt; (*t++ = *s) != '\0'; s++) { if (!adjbuf(&fmt, &fmtsz, MAXNUMSIZE+1+t-fmt, recsize, &t, "format3")) FATAL("format item %.30s... ran format() out of memory", os); - if (isalpha((uschar)*s) && *s != 'l' && *s != 'h' && *s != 'L') - break; /* the ansi panoply */ + /* Ignore size specifiers */ + if (strchr("hjLlqtz", *s) != NULL) { /* the ansi panoply */ + t--; + continue; + } + if (isalpha((uschar)*s)) + break; if (*s == '$') { FATAL("'$' not permitted in awk formats"); } @@ -879,7 +877,8 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co } x = execute(a); a = a->nnext; - sprintf(t-1, "%d", fmtwd=(int) getfval(x)); + snprintf(t - 1, FMTSZ(t - 1), + "%d", fmtwd=(int) getfval(x)); if (fmtwd < 0) fmtwd = -fmtwd; adjbuf(&buf, &bufsize, fmtwd+1+p-buf, recsize, &p, "format"); @@ -901,16 +900,12 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co case 'f': case 'e': case 'g': case 'E': case 'G': flag = 'f'; break; - case 'd': case 'i': - flag = 'd'; - if(*(s-1) == 'l') break; - *(t-1) = 'l'; - *t = 'd'; + case 'd': case 'i': case 'o': case 'x': case 'X': case 'u': + flag = (*s == 'd' || *s == 'i') ? 'd' : 'u'; + *(t-1) = 'j'; + *t = *s; *++t = '\0'; break; - case 'o': case 'x': case 'X': case 'u': - flag = *(s-1) == 'l' ? 'd' : 'u'; - break; case 's': flag = 's'; break; @@ -931,20 +926,20 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co n = fmtwd; adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format5"); switch (flag) { - case '?': sprintf(p, "%s", fmt); /* unknown, so dump it too */ + case '?': snprintf(p, BUFSZ(p), "%s", fmt); /* unknown, so dump it too */ t = getsval(x); n = strlen(t); if (fmtwd > n) n = fmtwd; adjbuf(&buf, &bufsize, 1+strlen(p)+n+p-buf, recsize, &p, "format6"); p += strlen(p); - sprintf(p, "%s", t); + snprintf(p, BUFSZ(p), "%s", t); break; case 'a': case 'A': - case 'f': sprintf(p, fmt, getfval(x)); break; - case 'd': sprintf(p, fmt, (long) getfval(x)); break; - case 'u': sprintf(p, fmt, (int) getfval(x)); break; + case 'f': snprintf(p, BUFSZ(p), fmt, getfval(x)); break; + case 'd': snprintf(p, BUFSZ(p), fmt, (intmax_t) getfval(x)); break; + case 'u': snprintf(p, BUFSZ(p), fmt, (uintmax_t) getfval(x)); break; case 's': t = getsval(x); n = strlen(t); @@ -952,18 +947,18 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co n = fmtwd; if (!adjbuf(&buf, &bufsize, 1+n+p-buf, recsize, &p, "format7")) FATAL("huge string/format (%d chars) in printf %.30s... ran format() out of memory", n, t); - sprintf(p, fmt, t); + snprintf(p, BUFSZ(p), fmt, t); break; case 'c': if (isnum(x)) { - if (getfval(x)) - sprintf(p, fmt, (int) getfval(x)); + if ((int)getfval(x)) + snprintf(p, BUFSZ(p), fmt, (int) getfval(x)); else { *p++ = '\0'; /* explicit null byte */ *p = '\0'; /* next output will start here */ } } else - sprintf(p, fmt, getsval(x)[0]); + snprintf(p, BUFSZ(p), fmt, getsval(x)[0]); break; default: FATAL("can't happen: bad conversion %c in format()", flag); @@ -1074,13 +1069,15 @@ Cell *arith(Node **a, int n) /* a[0] + a[1], etc. also -a[0] */ case UMINUS: i = -i; break; - case UPLUS: /* handled by getfval(), above */ + case UPLUS: /* handled by getfval(), above */ break; case POWER: if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ i = ipow(i, (int) j); - else + else { + errno = 0; i = errcheck(pow(i, j), "pow"); + } break; default: /* can't happen */ FATAL("illegal arithmetic operator %d", n); @@ -1173,8 +1170,10 @@ Cell *assign(Node **a, int n) /* a[0] = a[1], a[0] += a[1], etc. */ case POWEQ: if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ xf = ipow(xf, (int) yf); - else + else { + errno = 0; xf = errcheck(pow(xf, yf), "pow"); + } break; default: FATAL("illegal assignment operator %d", n); @@ -1194,13 +1193,14 @@ Cell *cat(Node **a, int q) /* a[0] cat a[1] */ x = execute(a[0]); n1 = strlen(getsval(x)); - adjbuf(&s, &ssz, n1 + 1, recsize, 0, "cat1"); - (void) strncpy(s, x->sval, ssz); + adjbuf(&s, &ssz, n1, recsize, 0, "cat1"); + memcpy(s, x->sval, n1); y = execute(a[1]); n2 = strlen(getsval(y)); adjbuf(&s, &ssz, n1 + n2 + 1, recsize, 0, "cat2"); - (void) strncpy(s + n1, y->sval, ssz - n1); + memcpy(s + n1, y->sval, n2); + s[n1 + n2] = '\0'; tempfree(x); tempfree(y); @@ -1216,7 +1216,7 @@ Cell *pastat(Node **a, int n) /* a[0] { a[1] } */ { Cell *x; - if (a[0] == 0) + if (a[0] == NULL) x = execute(a[1]); else { x = execute(a[0]); @@ -1253,21 +1253,23 @@ Cell *dopa2(Node **a, int n) /* a[0], a[1] { a[2] } */ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ { - Cell *x = 0, *y, *ap; - char *s, *origs; - char *fs, *origfs = NULL; + Cell *x = NULL, *y, *ap; + const char *s, *origs, *t; + const char *fs = NULL; + char *origfs = NULL; int sep; - char *t, temp, num[50]; + char temp, num[50]; int n, tempstat, arg3type; + double result; y = execute(a[0]); /* source string */ origs = s = strdup(getsval(y)); arg3type = ptoi(a[3]); - if (a[2] == 0) /* fs string */ + if (a[2] == NULL) /* fs string */ fs = getsval(fsloc); else if (arg3type == STRING) { /* split(str,arr,"string") */ x = execute(a[2]); - origfs = fs = strdup(getsval(x)); + fs = origfs = strdup(getsval(x)); tempfree(x); } else if (arg3type == REGEXPR) fs = "(regexpr)"; /* split(str,arr,/regexpr/) */ @@ -1276,7 +1278,7 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ sep = *fs; ap = execute(a[1]); /* array name */ freesymtab(ap); - dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs) ); + DPRINTF("split: s=|%s|, a=%s, sep=|%s|\n", s, NN(ap->nval), fs); ap->tval &= ~STR; ap->tval |= ARR; ap->sval = (char *) makesymtab(NSYMTAB); @@ -1300,18 +1302,18 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ pfa->initstat = 2; do { n++; - sprintf(num, "%d", n); + snprintf(num, sizeof(num), "%d", n); temp = *patbeg; - *patbeg = '\0'; - if (is_number(s)) - setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + setptr(patbeg, '\0'); + if (is_number(s, & result)) + setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); - *patbeg = temp; + setptr(patbeg, temp); s = patbeg + patlen; - if (*(patbeg+patlen-1) == 0 || *s == 0) { + if (*(patbeg+patlen-1) == '\0' || *s == '\0') { n++; - sprintf(num, "%d", n); + snprintf(num, sizeof(num), "%d", n); setsymtab(num, "", 0.0, STR, (Array *) ap->sval); pfa->initstat = tempstat; goto spdone; @@ -1321,69 +1323,70 @@ Cell *split(Node **a, int nnn) /* split(a[0], a[1], a[2]); a[3] is type */ /* cf gsub and refldbld */ } n++; - sprintf(num, "%d", n); - if (is_number(s)) - setsymtab(num, s, atof(s), STR|NUM, (Array *) ap->sval); + snprintf(num, sizeof(num), "%d", n); + if (is_number(s, & result)) + setsymtab(num, s, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, s, 0.0, STR, (Array *) ap->sval); spdone: pfa = NULL; } else if (sep == ' ') { for (n = 0; ; ) { - while (*s == ' ' || *s == '\t' || *s == '\n') +#define ISWS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') + while (ISWS(*s)) s++; - if (*s == 0) + if (*s == '\0') break; n++; t = s; do s++; - while (*s!=' ' && *s!='\t' && *s!='\n' && *s!='\0'); + while (*s != '\0' && !ISWS(*s)); temp = *s; - *s = '\0'; - sprintf(num, "%d", n); - if (is_number(t)) - setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + setptr(s, '\0'); + snprintf(num, sizeof(num), "%d", n); + if (is_number(t, & result)) + setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); - *s = temp; - if (*s != 0) + setptr(s, temp); + if (*s != '\0') s++; } } else if (sep == 0) { /* new: split(s, a, "") => 1 char/elem */ - for (n = 0; *s != 0; s++) { + for (n = 0; *s != '\0'; s++) { char buf[2]; n++; - sprintf(num, "%d", n); + snprintf(num, sizeof(num), "%d", n); buf[0] = *s; - buf[1] = 0; + buf[1] = '\0'; if (isdigit((uschar)buf[0])) setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval); else setsymtab(num, buf, 0.0, STR, (Array *) ap->sval); } - } else if (*s != 0) { + } else if (*s != '\0') { for (;;) { n++; t = s; while (*s != sep && *s != '\n' && *s != '\0') s++; temp = *s; - *s = '\0'; - sprintf(num, "%d", n); - if (is_number(t)) - setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval); + setptr(s, '\0'); + snprintf(num, sizeof(num), "%d", n); + if (is_number(t, & result)) + setsymtab(num, t, result, STR|NUM, (Array *) ap->sval); else setsymtab(num, t, 0.0, STR, (Array *) ap->sval); - *s = temp; - if (*s++ == 0) + setptr(s, temp); + if (*s++ == '\0') break; } } tempfree(ap); tempfree(y); - free(origs); - free(origfs); + xfree(origs); + xfree(origfs); x = gettemp(); x->tval = NUM; x->fval = n; @@ -1413,7 +1416,7 @@ Cell *ifstat(Node **a, int n) /* if (a[0]) a[1]; else a[2] */ if (istrue(x)) { tempfree(x); x = execute(a[1]); - } else if (a[2] != 0) { + } else if (a[2] != NULL) { tempfree(x); x = execute(a[2]); } @@ -1465,7 +1468,7 @@ Cell *forstat(Node **a, int n) /* for (a[0]; a[1]; a[2]) a[3] */ x = execute(a[0]); tempfree(x); for (;;) { - if (a[1]!=0) { + if (a[1]!=NULL) { x = execute(a[1]); if (!istrue(x)) return(x); else tempfree(x); @@ -1513,17 +1516,100 @@ Cell *instat(Node **a, int n) /* for (a[0] in a[1]) a[2] */ return True; } +static char *nawk_convert(const char *s, int (*fun_c)(int), + wint_t (*fun_wc)(wint_t)) +{ + char *buf = NULL; + char *pbuf = NULL; + const char *ps = NULL; + size_t n = 0; + wchar_t wc; + size_t sz = MB_CUR_MAX; + + if (sz == 1) { + buf = tostring(s); + + for (pbuf = buf; *pbuf; pbuf++) + *pbuf = fun_c((uschar)*pbuf); + + return buf; + } else { + /* upper/lower character may be shorter/longer */ + buf = tostringN(s, strlen(s) * sz + 1); + + (void) mbtowc(NULL, NULL, 0); /* reset internal state */ + /* + * Reset internal state here too. + * Assign result to avoid a compiler warning. (Casting to void + * doesn't work.) + * Increment said variable to avoid a different warning. + */ + int unused = wctomb(NULL, L'\0'); + unused++; + + ps = s; + pbuf = buf; + while (n = mbtowc(&wc, ps, sz), + n > 0 && n != (size_t)-1 && n != (size_t)-2) + { + ps += n; + + n = wctomb(pbuf, fun_wc(wc)); + if (n == (size_t)-1) + FATAL("illegal wide character %s", s); + + pbuf += n; + } + + *pbuf = '\0'; + + if (n) + FATAL("illegal byte sequence %s", s); + + return buf; + } +} + +#ifdef __DJGPP__ +static wint_t towupper(wint_t wc) +{ + if (wc >= 0 && wc < 256) + return toupper(wc & 0xFF); + + return wc; +} + +static wint_t towlower(wint_t wc) +{ + if (wc >= 0 && wc < 256) + return tolower(wc & 0xFF); + + return wc; +} +#endif + +static char *nawk_toupper(const char *s) +{ + return nawk_convert(s, toupper, towupper); +} + +static char *nawk_tolower(const char *s) +{ + return nawk_convert(s, tolower, towlower); +} + Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg list */ { Cell *x, *y; Awkfloat u; - int t; + int t, sz; Awkfloat tmp; - char *p, *buf; + char *buf, *fmt; Node *nextarg; FILE *fp; - void flush_all(void); int status = 0; + time_t tv; + struct tm *tm; t = ptoi(a[0]); x = execute(a[1]); @@ -1536,19 +1622,25 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis u = strlen(getsval(x)); break; case FLOG: - u = errcheck(log(getfval(x)), "log"); break; + errno = 0; + u = errcheck(log(getfval(x)), "log"); + break; case FINT: modf(getfval(x), &u); break; case FEXP: - u = errcheck(exp(getfval(x)), "exp"); break; + errno = 0; + u = errcheck(exp(getfval(x)), "exp"); + break; case FSQRT: - u = errcheck(sqrt(getfval(x)), "sqrt"); break; + errno = 0; + u = errcheck(sqrt(getfval(x)), "sqrt"); + break; case FSIN: u = sin(getfval(x)); break; case FCOS: u = cos(getfval(x)); break; case FATAN: - if (nextarg == 0) { + if (nextarg == NULL) { WARNING("atan2 requires two arguments; returning 1.0"); u = 1.0; } else { @@ -1558,6 +1650,64 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis nextarg = nextarg->nnext; } break; + case FCOMPL: + u = ~((int)getfval(x)); + break; + case FAND: + if (nextarg == 0) { + WARNING("and requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) & ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FFOR: + if (nextarg == 0) { + WARNING("or requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) | ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FXOR: + if (nextarg == 0) { + WARNING("xor requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) ^ ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FLSHIFT: + if (nextarg == 0) { + WARNING("lshift requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) << ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; + case FRSHIFT: + if (nextarg == 0) { + WARNING("rshift requires two arguments; returning 0"); + u = 0; + break; + } + y = execute(a[1]->nnext); + u = ((int)getfval(x)) >> ((int)getfval(y)); + tempfree(y); + nextarg = nextarg->nnext; + break; case FSYSTEM: fflush(stdout); /* in case something is buffered already */ status = system(getsval(x)); @@ -1576,8 +1726,10 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis } break; case FRAND: - /* in principle, rand() returns something in 0..RAND_MAX */ - u = (Awkfloat) (rand() % RAND_MAX) / RAND_MAX; + /* random() returns numbers in [0..2^31-1] + * in order to get a number in [0, 1), divide it by 2^31 + */ + u = (Awkfloat) random() / (0x7fffffffL + 0x1UL); break; case FSRAND: if (isrec(x)) /* no argument provided */ @@ -1585,22 +1737,16 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis else u = getfval(x); tmp = u; - srand((unsigned int) u); + srandom((unsigned long) u); u = srand_seed; srand_seed = tmp; break; case FTOUPPER: case FTOLOWER: - buf = tostring(getsval(x)); - if (t == FTOUPPER) { - for (p = buf; *p; p++) - if (islower((uschar) *p)) - *p = toupper((uschar)*p); - } else { - for (p = buf; *p; p++) - if (isupper((uschar) *p)) - *p = tolower((uschar)*p); - } + if (t == FTOUPPER) + buf = nawk_toupper(getsval(x)); + else + buf = nawk_tolower(getsval(x)); tempfree(x); x = gettemp(); setsval(x, buf); @@ -1610,11 +1756,46 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis if (isrec(x) || strlen(getsval(x)) == 0) { flush_all(); /* fflush() or fflush("") -> all */ u = 0; - } else if ((fp = openfile(FFLUSH, getsval(x))) == NULL) + } else if ((fp = openfile(FFLUSH, getsval(x), NULL)) == NULL) u = EOF; else u = fflush(fp); break; + case FSYSTIME: + u = time((time_t *) 0); + break; + case FSTRFTIME: + /* strftime([format [,timestamp]]) */ + if (nextarg) { + y = execute(nextarg); + nextarg = nextarg->nnext; + tv = (time_t) getfval(y); + tempfree(y); + } else + tv = time((time_t *) 0); + tm = localtime(&tv); + if (tm == NULL) + FATAL("bad time %ld", (long)tv); + + if (isrec(x)) { + /* format argument not provided, use default */ + fmt = tostring("%a %b %d %H:%M:%S %Z %Y"); + } else + fmt = tostring(getsval(x)); + + sz = 32; + buf = NULL; + do { + if ((buf = realloc(buf, (sz *= 2))) == NULL) + FATAL("out of memory in strftime"); + } while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0'); + + y = gettemp(); + setsval(y, buf); + free(fmt); + free(buf); + + return y; default: /* can't happen */ FATAL("illegal function type %d", t); break; @@ -1622,7 +1803,7 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis tempfree(x); x = gettemp(); setfval(x, u); - if (nextarg != 0) { + if (nextarg != NULL) { WARNING("warning: function has too many arguments"); for ( ; nextarg; nextarg = nextarg->nnext) execute(nextarg); @@ -1636,7 +1817,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */ Cell *y; FILE *fp; - if (a[1] == 0) /* a[1] is redirection operator, a[2] is file */ + if (a[1] == NULL) /* a[1] is redirection operator, a[2] is file */ fp = stdout; else fp = redirect(ptoi(a[1]), a[2]); @@ -1649,7 +1830,7 @@ Cell *printstat(Node **a, int n) /* print a[0] */ else fputs(getsval(ofsloc), fp); } - if (a[1] != 0) + if (a[1] != NULL) fflush(fp); if (ferror(fp)) FATAL("write error on %s", filename(fp)); @@ -1670,7 +1851,7 @@ FILE *redirect(int a, Node *b) /* set up all i/o redirections */ x = execute(b); fname = getsval(x); - fp = openfile(a, fname); + fp = openfile(a, fname, NULL); if (fp == NULL) FATAL("can't open file %s", fname); tempfree(x); @@ -1683,14 +1864,14 @@ struct files { int mode; /* '|', 'a', 'w' => LE/LT, GT */ } *files; -int nfiles; +size_t nfiles; -void stdinit(void) /* in case stdin, etc., are not constants */ +static void stdinit(void) /* in case stdin, etc., are not constants */ { nfiles = FOPEN_MAX; - files = calloc(nfiles, sizeof(*files)); + files = (struct files *) calloc(nfiles, sizeof(*files)); if (files == NULL) - FATAL("can't allocate file memory for %u files", nfiles); + FATAL("can't allocate file memory for %zu files", nfiles); files[0].fp = stdin; files[0].fname = "/dev/stdin"; files[0].mode = LT; @@ -1702,33 +1883,35 @@ void stdinit(void) /* in case stdin, etc., are not constants */ files[2].mode = GT; } -FILE *openfile(int a, const char *us) +FILE *openfile(int a, const char *us, bool *pnewflag) { const char *s = us; - int i, m; - FILE *fp = 0; + size_t i; + int m; + FILE *fp = NULL; if (*s == '\0') FATAL("null file name in print or getline"); - for (i=0; i < nfiles; i++) - if (files[i].fname && strcmp(s, files[i].fname) == 0) { - if (a == files[i].mode || (a==APPEND && files[i].mode==GT)) - return files[i].fp; - if (a == FFLUSH) - return files[i].fp; + for (i = 0; i < nfiles; i++) + if (files[i].fname && strcmp(s, files[i].fname) == 0 && + (a == files[i].mode || (a==APPEND && files[i].mode==GT) || + a == FFLUSH)) { + if (pnewflag) + *pnewflag = false; + return files[i].fp; } if (a == FFLUSH) /* didn't find it, so don't create it! */ return NULL; - for (i=0; i < nfiles; i++) - if (files[i].fp == 0) + for (i = 0; i < nfiles; i++) + if (files[i].fp == NULL) break; if (i >= nfiles) { struct files *nf; - int nnf = nfiles + FOPEN_MAX; - nf = realloc(files, nnf * sizeof(*nf)); + size_t nnf = nfiles + FOPEN_MAX; + nf = (struct files *) realloc(files, nnf * sizeof(*nf)); if (nf == NULL) - FATAL("cannot grow files for %s and %d files", s, nnf); + FATAL("cannot grow files for %s and %zu files", s, nnf); memset(&nf[nfiles], 0, FOPEN_MAX * sizeof(*nf)); nfiles = nnf; files = nf; @@ -1752,13 +1935,17 @@ FILE *openfile(int a, const char *us) files[i].fname = tostring(s); files[i].fp = fp; files[i].mode = m; + if (pnewflag) + *pnewflag = true; + if (fp != stdin && fp != stdout && fp != stderr) + (void) fcntl(fileno(fp), F_SETFD, FD_CLOEXEC); } return fp; } const char *filename(FILE *fp) { - int i; + size_t i; for (i = 0; i < nfiles; i++) if (fp == files[i].fp) @@ -1766,70 +1953,80 @@ const char *filename(FILE *fp) return "???"; } -Cell *closefile(Node **a, int n) -{ - Cell *x; - int i, stat; - - x = execute(a[0]); - getsval(x); - stat = -1; - for (i = 0; i < nfiles; i++) { - if (files[i].fname && strcmp(x->sval, files[i].fname) == 0) { - if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - WARNING( "i/o error occurred closing %s", files[i].fname ); - if (i > 2) /* don't do /dev/std... */ - xfree(files[i].fname); - files[i].fname = NULL; /* watch out for ref thru this */ - files[i].fp = NULL; - } - } - tempfree(x); - x = gettemp(); - setfval(x, (Awkfloat) stat); - return(x); -} + Cell *closefile(Node **a, int n) + { + Cell *x; + size_t i; + bool stat; + + x = execute(a[0]); + getsval(x); + stat = true; + for (i = 0; i < nfiles; i++) { + if (!files[i].fname || strcmp(x->sval, files[i].fname) != 0) + continue; + if (ferror(files[i].fp)) + FATAL("i/o error occurred on %s", files[i].fname); + if (files[i].fp == stdin || files[i].fp == stdout || + files[i].fp == stderr) + stat = freopen("/dev/null", "r+", files[i].fp) == NULL; + else if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL("i/o error occurred closing %s", files[i].fname); + if (i > 2) /* don't do /dev/std... */ + xfree(files[i].fname); + files[i].fname = NULL; /* watch out for ref thru this */ + files[i].fp = NULL; + break; + } + tempfree(x); + x = gettemp(); + setfval(x, (Awkfloat) (stat ? -1 : 0)); + return(x); + } void closeall(void) { - int i, stat; - - for (i = 0; i < FOPEN_MAX; i++) { - if (files[i].fp) { - if (ferror(files[i].fp)) - WARNING( "i/o error occurred on %s", files[i].fname ); - if (files[i].mode == '|' || files[i].mode == LE) - stat = pclose(files[i].fp); - else - stat = fclose(files[i].fp); - if (stat == EOF) - WARNING( "i/o error occurred while closing %s", files[i].fname ); - } + size_t i; + bool stat = false; + + for (i = 0; i < nfiles; i++) { + if (! files[i].fp) + continue; + if (ferror(files[i].fp)) + FATAL( "i/o error occurred on %s", files[i].fname ); + if (files[i].fp == stdin) + continue; + if (files[i].mode == '|' || files[i].mode == LE) + stat = pclose(files[i].fp) == -1; + else if (files[i].fp == stdout || files[i].fp == stderr) + stat = fflush(files[i].fp) == EOF; + else + stat = fclose(files[i].fp) == EOF; + if (stat) + FATAL( "i/o error occurred while closing %s", files[i].fname ); } } -void flush_all(void) +static void flush_all(void) { - int i; + size_t i; for (i = 0; i < nfiles; i++) if (files[i].fp) fflush(files[i].fp); } -void backsub(char **pb_ptr, char **sptr_ptr); +void backsub(char **pb_ptr, const char **sptr_ptr); Cell *sub(Node **a, int nnn) /* substitute command */ { - char *sptr, *pb, *q; + const char *sptr, *q; Cell *x, *y, *result; - char *t, *buf; + char *t, *buf, *pb; fa *pfa; int bufsz = recsize; @@ -1837,7 +2034,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ FATAL("out of memory in sub"); x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); @@ -1853,7 +2050,7 @@ Cell *sub(Node **a, int nnn) /* substitute command */ while (sptr < patbeg) *pb++ = *sptr++; sptr = getsval(y); - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "sub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1871,13 +2068,13 @@ Cell *sub(Node **a, int nnn) /* substitute command */ sptr = patbeg + patlen; if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) { adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "sub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; } if (pb > buf + bufsz) FATAL("sub result2 %.30s too big; can't happen", buf); setsval(x, buf); /* BUG: should be able to avoid copy */ - result = True;; + result = True; } tempfree(x); tempfree(y); @@ -1888,7 +2085,8 @@ Cell *sub(Node **a, int nnn) /* substitute command */ Cell *gsub(Node **a, int nnn) /* global substitute */ { Cell *x, *y; - char *rptr, *sptr, *t, *pb, *q; + char *rptr, *pb; + const char *q, *t, *sptr; char *buf; fa *pfa; int mflag, tempstat, num; @@ -1900,7 +2098,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ num = 0; x = execute(a[3]); /* target string */ t = getsval(x); - if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + if (a[0] == NULL) /* 0 => a[1] is already-compiled regexpr */ pfa = (fa *) a[1]; /* regular expression */ else { y = execute(a[1]); @@ -1914,11 +2112,11 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ pb = buf; rptr = getsval(y); do { - if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (patlen == 0 && *patbeg != '\0') { /* matched empty string */ if (mflag == 0) { /* can replace empty */ num++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1931,7 +2129,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } } - if (*t == 0) /* at end */ + if (*t == '\0') /* at end */ goto done; adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gsub"); *pb++ = *t++; @@ -1946,7 +2144,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ while (sptr < patbeg) *pb++ = *sptr++; sptr = rptr; - while (*sptr != 0) { + while (*sptr != '\0') { adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gsub"); if (*sptr == '\\') { backsub(&pb, &sptr); @@ -1959,7 +2157,7 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ *pb++ = *sptr++; } t = patbeg + patlen; - if (patlen == 0 || *t == 0 || *(t-1) == 0) + if (patlen == 0 || *t == '\0' || *(t-1) == '\0') goto done; if (pb > buf + bufsz) FATAL("gsub result1 %.30s too big; can't happen", buf); @@ -1968,8 +2166,8 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ } while (pmatch(pfa,t)); sptr = t; adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gsub"); - while ((*pb++ = *sptr++) != 0) - ; + while ((*pb++ = *sptr++) != '\0') + continue; done: if (pb < buf + bufsz) *pb = '\0'; else if (*(pb-1) != '\0') @@ -1986,9 +2184,158 @@ Cell *gsub(Node **a, int nnn) /* global substitute */ return(x); } -void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */ +Cell *gensub(Node **a, int nnn) /* global selective substitute */ + /* XXX incomplete - doesn't support backreferences \0 ... \9 */ +{ + Cell *x, *y, *res, *h; + char *rptr; + const char *sptr; + char *buf, *pb; + const char *t, *q; + fa *pfa; + int mflag, tempstat, num, whichm; + int bufsz = recsize; + + if ((buf = malloc(bufsz)) == NULL) + FATAL("out of memory in gensub"); + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[4]); /* source string */ + t = getsval(x); + res = copycell(x); /* target string - initially copy of source */ + res->csub = CTEMP; /* result values are temporary */ + if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */ + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y); + } + y = execute(a[2]); /* replacement string */ + h = execute(a[3]); /* which matches should be replaced */ + sptr = getsval(h); + if (sptr[0] == 'g' || sptr[0] == 'G') + whichm = -1; + else { + /* + * The specified number is index of replacement, starting + * from 1. GNU awk treats index lower than 0 same as + * 1, we do same for compatibility. + */ + whichm = (int) getfval(h) - 1; + if (whichm < 0) + whichm = 0; + } + tempfree(h); + + if (pmatch(pfa, t)) { + char *sl; + + tempstat = pfa->initstat; + pfa->initstat = 2; + pb = buf; + rptr = getsval(y); + /* + * XXX if there are any backreferences in subst string, + * complain now. + */ + for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) { + if (strchr("0123456789", sl[1])) { + FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr); + } + } + + do { + if (whichm >= 0 && whichm != num) { + num++; + adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub"); + + /* copy the part of string up to and including + * match to output buffer */ + while (t < patbeg + patlen) + *pb++ = *t++; + continue; + } + + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + } + if (*t == 0) /* at end */ + goto done; + adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub"); + *pb++ = *t++; + if (pb > buf + bufsz) /* BUG: not sure of this test */ + FATAL("gensub result0 %.30s too big; can't happen", buf); + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub"); + while (sptr < patbeg) + *pb++ = *sptr++; + sptr = rptr; + while (*sptr != 0) { + adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub"); + if (*sptr == '\\') { + backsub(&pb, &sptr); + } else if (*sptr == '&') { + sptr++; + adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub"); + for (q = patbeg; q < patbeg+patlen; ) + *pb++ = *q++; + } else + *pb++ = *sptr++; + } + t = patbeg + patlen; + if (patlen == 0 || *t == 0 || *(t-1) == 0) + goto done; + if (pb > buf + bufsz) + FATAL("gensub result1 %.30s too big; can't happen", buf); + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub"); + while ((*pb++ = *sptr++) != 0) + ; + done: if (pb > buf + bufsz) + FATAL("gensub result2 %.30s too big; can't happen", buf); + *pb = '\0'; + setsval(res, buf); + pfa->initstat = tempstat; + } + tempfree(x); + tempfree(y); + free(buf); + return(res); +} + +void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */ { /* sptr[0] == '\\' */ - char *pb = *pb_ptr, *sptr = *sptr_ptr; + char *pb = *pb_ptr; + const char *sptr = *sptr_ptr; + static bool first = true; + static bool do_posix = false; + + if (first) { + first = false; + do_posix = (getenv("POSIXLY_CORRECT") != NULL); + } if (sptr[1] == '\\') { if (sptr[2] == '\\' && sptr[3] == '&') { /* \\\& -> \& */ @@ -1998,6 +2345,9 @@ void backsub(char **pb_ptr, char **sptr_ptr) /* handle \\& variations */ } else if (sptr[2] == '&') { /* \\& -> \ + matched */ *pb++ = '\\'; sptr += 2; + } else if (do_posix) { /* \\x -> \x */ + sptr++; + *pb++ = *sptr++; } else { /* \\x -> \\x */ *pb++ = *sptr++; *pb++ = *sptr++; @@ -29,7 +29,6 @@ THIS SOFTWARE. #include <string.h> #include <stdlib.h> #include "awk.h" -#include "ytab.h" #define FULLTAB 2 /* rehash when table gets this x full */ #define GROWTAB 4 /* grow table by this factor */ @@ -114,6 +113,7 @@ void syminit(void) /* initialize symbol table with builtin vars */ rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); RLENGTH = &rlengthloc->fval; symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + free(symtabloc->sval); symtabloc->sval = (char *) symtab; } @@ -126,11 +126,14 @@ void arginit(int ac, char **av) /* set up ARGV and ARGC */ ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; cp = setsymtab("ARGV", "", 0.0, ARR, symtab); ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ + free(cp->sval); cp->sval = (char *) ARGVtab; for (i = 0; i < ac; i++) { + double result; + sprintf(temp, "%d", i); - if (is_number(*av)) - setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab); + if (is_number(*av, & result)) + setsymtab(temp, *av, result, STR|NUM, ARGVtab); else setsymtab(temp, *av, 0.0, STR, ARGVtab); av++; @@ -144,15 +147,18 @@ void envinit(char **envp) /* set up ENVIRON variable */ cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); ENVtab = makesymtab(NSYMTAB); + free(cp->sval); cp->sval = (char *) ENVtab; for ( ; *envp; envp++) { + double result; + if ((p = strchr(*envp, '=')) == NULL) continue; if( p == *envp ) /* no left hand side name in env string */ continue; *p++ = 0; /* split into two strings at = */ - if (is_number(p)) - setsymtab(*envp, p, atof(p), STR|NUM, ENVtab); + if (is_number(p, & result)) + setsymtab(*envp, p, result, STR|NUM, ENVtab); else setsymtab(*envp, p, 0.0, STR, ENVtab); p[-1] = '='; /* restore in case env is passed down to a shell */ @@ -164,8 +170,8 @@ Array *makesymtab(int n) /* make a new symbol table */ Array *ap; Cell **tp; - ap = (Array *) malloc(sizeof(Array)); - tp = (Cell **) calloc(n, sizeof(Cell *)); + ap = (Array *) malloc(sizeof(*ap)); + tp = (Cell **) calloc(n, sizeof(*tp)); if (ap == NULL || tp == NULL) FATAL("out of space in makesymtab"); ap->nelem = 0; @@ -191,10 +197,10 @@ void freesymtab(Cell *ap) /* free a symbol table */ if (freeable(cp)) xfree(cp->sval); temp = cp->cnext; /* avoids freeing then using */ - free(cp); + free(cp); tp->nelem--; } - tp->tab[i] = 0; + tp->tab[i] = NULL; } if (tp->nelem != 0) WARNING("can't happen: inconsistent element count freeing %s", ap->nval); @@ -207,7 +213,7 @@ void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */ Array *tp; Cell *p, *prev = NULL; int h; - + tp = (Array *) ap->sval; h = hash(s, tp->size); for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) @@ -231,11 +237,11 @@ Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) Cell *p; if (n != NULL && (p = lookup(n, tp)) != NULL) { - dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", - (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) ); + DPRINTF("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n", + (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval); return(p); } - p = (Cell *) malloc(sizeof(Cell)); + p = (Cell *) malloc(sizeof(*p)); if (p == NULL) FATAL("out of space for symbol table at %s", n); p->nval = tostring(n); @@ -250,8 +256,8 @@ Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp) h = hash(n, tp->size); p->cnext = tp->tab[h]; tp->tab[h] = p; - dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", - (void*)p, p->nval, p->sval, p->fval, p->tval) ); + DPRINTF("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n", + (void*)p, p->nval, p->sval, p->fval, p->tval); return(p); } @@ -270,7 +276,7 @@ void rehash(Array *tp) /* rehash items in small table into big one */ Cell *cp, *op, **np; nsz = GROWTAB * tp->size; - np = (Cell **) calloc(nsz, sizeof(Cell *)); + np = (Cell **) calloc(nsz, sizeof(*np)); if (np == NULL) /* can't do it, but can keep running. */ return; /* someone else will run out later. */ for (i = 0; i < tp->size; i++) { @@ -303,23 +309,24 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ int fldno; f += 0.0; /* normalise negative zero to positive zero */ - if ((vp->tval & (NUM | STR)) == 0) + if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { - donerec = 0; /* mark $0 invalid */ + donerec = false; /* mark $0 invalid */ fldno = atoi(vp->nval); if (fldno > *NF) newfld(fldno); - dprintf( ("setting field %d to %g\n", fldno, f) ); + DPRINTF("setting field %d to %g\n", fldno, f); } else if (&vp->fval == NF) { - donerec = 0; /* mark $0 invalid */ + donerec = false; /* mark $0 invalid */ setlastfld(f); - dprintf( ("setting NF to %g\n", f) ); + DPRINTF("setting NF to %g\n", f); } else if (isrec(vp)) { - donefld = 0; /* mark $1... invalid */ - donerec = 1; + donefld = false; /* mark $1... invalid */ + donerec = true; + savefs(); } else if (vp == ofsloc) { - if (donerec == 0) + if (!donerec) recbld(); } if (freeable(vp)) @@ -329,7 +336,7 @@ Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */ vp->tval |= NUM; /* mark number ok */ if (f == -0) /* who would have thought this possible? */ f = 0; - dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) ); + DPRINTF("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval); return vp->fval = f; } @@ -340,7 +347,7 @@ void funnyvar(Cell *vp, const char *rw) if (vp->tval & FCN) FATAL("can't %s %s; it's a function.", rw, vp->nval); WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o", - vp, vp->nval, vp->sval, vp->fval, vp->tval); + (void *)vp, vp->nval, vp->sval, vp->fval, vp->tval); } char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ @@ -349,21 +356,22 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ int fldno; Awkfloat f; - dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", - (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) ); + DPRINTF("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", + (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld); if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "assign to"); if (isfld(vp)) { - donerec = 0; /* mark $0 invalid */ + donerec = false; /* mark $0 invalid */ fldno = atoi(vp->nval); if (fldno > *NF) newfld(fldno); - dprintf( ("setting field %d to %s (%p)\n", fldno, s, (void *) s) ); + DPRINTF("setting field %d to %s (%p)\n", fldno, s, (const void*)s); } else if (isrec(vp)) { - donefld = 0; /* mark $1... invalid */ - donerec = 1; + donefld = false; /* mark $1... invalid */ + donerec = true; + savefs(); } else if (vp == ofsloc) { - if (donerec == 0) + if (!donerec) recbld(); } t = s ? tostring(s) : tostring(""); /* in case it's self-assign */ @@ -373,14 +381,14 @@ char *setsval(Cell *vp, const char *s) /* set string val of a Cell */ vp->tval |= STR; vp->fmt = NULL; setfree(vp); - dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", - (void*)vp, NN(vp->nval), t, (void *) t, vp->tval, donerec, donefld) ); + DPRINTF("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", + (void*)vp, NN(vp->nval), t, (void*)t, vp->tval, donerec, donefld); vp->sval = t; if (&vp->fval == NF) { - donerec = 0; /* mark $0 invalid */ + donerec = false; /* mark $0 invalid */ f = getfval(vp); setlastfld(f); - dprintf( ("setting NF to %g\n", f) ); + DPRINTF("setting NF to %g\n", f); } return(vp->sval); @@ -390,30 +398,47 @@ Awkfloat getfval(Cell *vp) /* get float val of a Cell */ { if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "read value of"); - if (isfld(vp) && donefld == 0) + if (isfld(vp) && !donefld) fldbld(); - else if (isrec(vp) && donerec == 0) + else if (isrec(vp) && !donerec) recbld(); if (!isnum(vp)) { /* not a number */ - vp->fval = atof(vp->sval); /* best guess */ - if (is_number(vp->sval) && !(vp->tval&CON)) - vp->tval |= NUM; /* make NUM only sparingly */ + double fval; + bool no_trailing; + + if (is_valid_number(vp->sval, true, & no_trailing, & fval)) { + vp->fval = fval; + if (no_trailing && !(vp->tval&CON)) + vp->tval |= NUM; /* make NUM only sparingly */ + } else + vp->fval = 0.0; } - dprintf( ("getfval %p: %s = %g, t=%o\n", - (void*)vp, NN(vp->nval), vp->fval, vp->tval) ); + DPRINTF("getfval %p: %s = %g, t=%o\n", + (void*)vp, NN(vp->nval), vp->fval, vp->tval); return(vp->fval); } +static const char *get_inf_nan(double d) +{ + if (isinf(d)) { + return (d < 0 ? "-inf" : "+inf"); + } else if (isnan(d)) { + return (signbit(d) != 0 ? "-nan" : "+nan"); + } else + return NULL; +} + static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cell */ { char s[256]; double dtemp; + const char *p; if ((vp->tval & (NUM | STR)) == 0) funnyvar(vp, "read value of"); - if (isfld(vp) && donefld == 0) + if (isfld(vp) && ! donefld) fldbld(); - else if (isrec(vp) && donerec == 0) + else if (isrec(vp) && ! donerec) recbld(); /* @@ -444,7 +469,9 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel { \ if (freeable(vp)) \ xfree(vp->sval); \ - if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ + if ((p = get_inf_nan(vp->fval)) != NULL) \ + strcpy(s, p); \ + else if (modf(vp->fval, &dtemp) == 0) /* it's integral */ \ snprintf(s, sizeof (s), "%.30g", vp->fval); \ else \ snprintf(s, sizeof (s), *fmt, vp->fval); \ @@ -487,8 +514,8 @@ static char *get_str_val(Cell *vp, char **fmt) /* get string val of a Cel } } done: - dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n", - (void*)vp, NN(vp->nval), vp->sval, (void *) vp->sval, vp->tval) ); + DPRINTF("getsval %p: %s = \"%s (%p)\", t=%o\n", + (void*)vp, NN(vp->nval), vp->sval, (void*)vp->sval, vp->tval); return(vp->sval); } @@ -505,20 +532,53 @@ char *getpssval(Cell *vp) /* get string val of a Cell for print */ char *tostring(const char *s) /* make a copy of string s */ { + char *p = strdup(s); + if (p == NULL) + FATAL("out of space in tostring on %s", s); + return(p); +} + +char *tostringN(const char *s, size_t n) /* make a copy of string s */ +{ char *p; - p = (char *) malloc(strlen(s)+1); + p = (char *) malloc(n); if (p == NULL) FATAL("out of space in tostring on %s", s); strcpy(p, s); return(p); } +Cell *catstr(Cell *a, Cell *b) /* concatenate a and b */ +{ + Cell *c; + char *p; + char *sa = getsval(a); + char *sb = getsval(b); + size_t l = strlen(sa) + strlen(sb) + 1; + p = (char *) malloc(l); + if (p == NULL) + FATAL("out of space concatenating %s and %s", sa, sb); + snprintf(p, l, "%s%s", sa, sb); + + l++; // add room for ' ' + char *newbuf = (char *) malloc(l); + if (newbuf == NULL) + FATAL("out of space concatenating %s and %s", sa, sb); + // See string() in lex.c; a string "xx" is stored in the symbol + // table as "xx ". + snprintf(newbuf, l, "%s ", p); + c = setsymtab(newbuf, p, 0.0, CON|STR|DONTFREE, symtab); + free(p); + free(newbuf); + return c; +} + char *qstring(const char *is, int delim) /* collect string up to next delim */ { const char *os = is; int c, n; - uschar *s = (uschar *) is; + const uschar *s = (const uschar *) is; uschar *buf, *bp; if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL) @@ -533,7 +593,7 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */ if (c == 0) { /* \ at end */ *bp++ = '\\'; break; /* for loop */ - } + } switch (c) { case '\\': *bp++ = '\\'; break; case 'n': *bp++ = '\n'; break; @@ -541,6 +601,8 @@ char *qstring(const char *is, int delim) /* collect string up to next delim */ case 'b': *bp++ = '\b'; break; case 'f': *bp++ = '\f'; break; case 'r': *bp++ = '\r'; break; + case 'v': *bp++ = '\v'; break; + case 'a': *bp++ = '\a'; break; default: if (!isdigit(c)) { *bp++ = c; |