diff options
Diffstat (limited to 'contrib/awk/array.c')
-rw-r--r-- | contrib/awk/array.c | 858 |
1 files changed, 0 insertions, 858 deletions
diff --git a/contrib/awk/array.c b/contrib/awk/array.c deleted file mode 100644 index 905f3badd0a1..000000000000 --- a/contrib/awk/array.c +++ /dev/null @@ -1,858 +0,0 @@ -/* - * array.c - routines for associative arrays. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -/* - * Tree walks (``for (iggy in foo)'') and array deletions use expensive - * linear searching. So what we do is start out with small arrays and - * grow them as needed, so that our arrays are hopefully small enough, - * most of the time, that they're pretty full and we're not looking at - * wasted space. - * - * The decision is made to grow the array if the average chain length is - * ``too big''. This is defined as the total number of entries in the table - * divided by the size of the array being greater than some constant. - */ - -#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */ - -#include "awk.h" - -static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1)); -static void grow_table P((NODE *symbol)); - -/* concat_exp --- concatenate expression list into a single string */ - -NODE * -concat_exp(register NODE *tree) -{ - register NODE *r; - char *str; - char *s; - size_t len; - int offset; - size_t subseplen; - char *subsep; - - if (tree->type != Node_expression_list) - return force_string(tree_eval(tree)); - r = force_string(tree_eval(tree->lnode)); - if (tree->rnode == NULL) - return r; - subseplen = SUBSEP_node->var_value->stlen; - subsep = SUBSEP_node->var_value->stptr; - len = r->stlen + subseplen + 2; - emalloc(str, char *, len, "concat_exp"); - memcpy(str, r->stptr, r->stlen+1); - s = str + r->stlen; - free_temp(r); - for (tree = tree->rnode; tree != NULL; tree = tree->rnode) { - if (subseplen == 1) - *s++ = *subsep; - else { - memcpy(s, subsep, subseplen+1); - s += subseplen; - } - r = force_string(tree_eval(tree->lnode)); - len += r->stlen + subseplen; - offset = s - str; - erealloc(str, char *, len, "concat_exp"); - s = str + offset; - memcpy(s, r->stptr, r->stlen+1); - s += r->stlen; - free_temp(r); - } - r = make_str_node(str, s - str, ALREADY_MALLOCED); - r->flags |= TEMP; - return r; -} - -/* assoc_clear --- flush all the values in symbol[] before doing a split() */ - -void -assoc_clear(NODE *symbol) -{ - int i; - NODE *bucket, *next; - - if (symbol->var_array == NULL) - return; - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) { - next = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - } - symbol->var_array[i] = NULL; - } - free(symbol->var_array); - symbol->var_array = NULL; - symbol->array_size = symbol->table_size = 0; - symbol->flags &= ~ARRAYMAXED; -} - -/* hash --- calculate the hash function of the string in subs */ - -unsigned int -hash(register const char *s, register size_t len, unsigned long hsize) -{ - register unsigned long h = 0; - - /* - * This is INCREDIBLY ugly, but fast. We break the string up into - * 8 byte units. On the first time through the loop we get the - * "leftover bytes" (strlen % 8). On every other iteration, we - * perform 8 HASHC's so we handle all 8 bytes. Essentially, this - * saves us 7 cmp & branch instructions. If this routine is - * heavily used enough, it's worth the ugly coding. - * - * OZ's original sdbm hash, copied from Margo Seltzers db package. - */ - - /* - * Even more speed: - * #define HASHC h = *s++ + 65599 * h - * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts - */ -#define HASHC htmp = (h << 6); \ - h = *s++ + htmp + (htmp << 10) - h - - unsigned long htmp; - - h = 0; - -#if defined(VAXC) - /* - * This was an implementation of "Duff's Device", but it has been - * redone, separating the switch for extra iterations from the - * loop. This is necessary because the DEC VAX-C compiler is - * STOOPID. - */ - switch (len & (8 - 1)) { - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - default: break; - } - - if (len > (8 - 1)) { - register size_t loop = len >> 3; - do { - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - } while (--loop); - } -#else /* ! VAXC */ - /* "Duff's Device" for those who can handle it */ - if (len > 0) { - register size_t loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASHC; - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - } while (--loop); - } - } -#endif /* ! VAXC */ - - if (h >= hsize) - h %= hsize; - return h; -} - -/* assoc_find --- locate symbol[subs] */ - -static NODE * /* NULL if not found */ -assoc_find(NODE *symbol, register NODE *subs, int hash1) -{ - register NODE *bucket; - NODE *s1, *s2; - - for (bucket = symbol->var_array[hash1]; bucket != NULL; - bucket = bucket->ahnext) { - /* - * This used to use cmp_nodes() here. That's wrong. - * Array indexes are strings; compare as such, always! - */ - s1 = bucket->ahname; - s1 = force_string(s1); - s2 = subs; - - if (s1->stlen == s2->stlen) { - if (s1->stlen == 0 /* "" is a valid index */ - || STREQN(s1->stptr, s2->stptr, s1->stlen)) - return bucket; - } - } - return NULL; -} - -/* in_array --- test whether the array element symbol[subs] exists or not */ - -int -in_array(NODE *symbol, NODE *subs) -{ - register int hash1; - int ret; - - if (symbol->type == Node_param_list) - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if ((symbol->flags & SCALAR) != 0) - fatal(_("attempt to use scalar `%s' as array"), symbol->vname); - /* - * evaluate subscript first, it could have side effects - */ - subs = concat_exp(subs); /* concat_exp returns a string node */ - if (symbol->var_array == NULL) { - free_temp(subs); - return 0; - } - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - ret = (assoc_find(symbol, subs, hash1) != NULL); - free_temp(subs); - return ret; -} - -/* - * assoc_lookup: - * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it - * isn't there. Returns a pointer ala get_lhs to where its value is stored. - * - * SYMBOL is the address of the node (or other pointer) being dereferenced. - * SUBS is a number or string used as the subscript. - */ - -NODE ** -assoc_lookup(NODE *symbol, NODE *subs, int reference) -{ - register int hash1; - register NODE *bucket; - - assert(symbol->type == Node_var_array || symbol->type == Node_var); - - (void) force_string(subs); - - if ((symbol->flags & SCALAR) != 0) - fatal(_("attempt to use scalar `%s' as array"), symbol->vname); - - if (symbol->var_array == NULL) { - if (symbol->type != Node_var_array) { - unref(symbol->var_value); - symbol->type = Node_var_array; - } - symbol->array_size = symbol->table_size = 0; /* sanity */ - symbol->flags &= ~ARRAYMAXED; - grow_table(symbol); - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } else { - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - bucket = assoc_find(symbol, subs, hash1); - if (bucket != NULL) { - free_temp(subs); - return &(bucket->ahvalue); - } - } - - if (do_lint && reference) { - subs->stptr[subs->stlen] = '\0'; - lintwarn(_("reference to uninitialized element `%s[\"%s\"]'"), - symbol->vname, subs->stptr); - } - - /* It's not there, install it. */ - if (do_lint && subs->stlen == 0) - lintwarn(_("subscript of array `%s' is null string"), - symbol->vname); - - /* first see if we would need to grow the array, before installing */ - symbol->table_size++; - if ((symbol->flags & ARRAYMAXED) == 0 - && (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) { - grow_table(symbol); - /* have to recompute hash value for new size */ - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } - - getnode(bucket); - bucket->type = Node_ahash; - - /* - * Freeze this string value --- it must never - * change, no matter what happens to the value - * that created it or to CONVFMT, etc. - * - * One day: Use an atom table to track array indices, - * and avoid the extra memory overhead. - */ - if (subs->flags & TEMP) - bucket->ahname = dupnode(subs); - else - bucket->ahname = copynode(subs); - - free_temp(subs); - - /* array subscripts are strings */ - bucket->ahname->flags &= ~(NUMBER|NUM); - bucket->ahname->flags |= (STRING|STR); - /* ensure that this string value never changes */ - bucket->ahname->stfmt = -1; - - bucket->ahvalue = Nnull_string; - bucket->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = bucket; - return &(bucket->ahvalue); -} - -/* do_delete --- perform `delete array[s]' */ - -void -do_delete(NODE *symbol, NODE *tree) -{ - register int hash1; - register NODE *bucket, *last; - NODE *subs; - - if (symbol->type == Node_param_list) { - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_var) - return; - } - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if (symbol->type == Node_var_array) { - if (symbol->var_array == NULL) - return; - } else - fatal(_("delete: illegal use of variable `%s' as array"), - symbol->vname); - - if (tree == NULL) { /* delete array */ - assoc_clear(symbol); - return; - } - - subs = concat_exp(tree); /* concat_exp returns string node */ - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - - last = NULL; - for (bucket = symbol->var_array[hash1]; bucket != NULL; - last = bucket, bucket = bucket->ahnext) { - /* - * This used to use cmp_nodes() here. That's wrong. - * Array indexes are strings; compare as such, always! - */ - NODE *s1, *s2; - - s1 = bucket->ahname; - s1 = force_string(s1); - s2 = subs; - - if (s1->stlen == s2->stlen) { - if (s1->stlen == 0 /* "" is a valid index */ - || STREQN(s1->stptr, s2->stptr, s1->stlen)) - break; - } - } - - if (bucket == NULL) { - if (do_lint) - lintwarn(_("delete: index `%s' not in array `%s'"), - subs->stptr, symbol->vname); - free_temp(subs); - return; - } - free_temp(subs); - if (last != NULL) - last->ahnext = bucket->ahnext; - else - symbol->var_array[hash1] = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - symbol->table_size--; - if (symbol->table_size <= 0) { - memset(symbol->var_array, '\0', - sizeof(NODE *) * symbol->array_size); - symbol->table_size = symbol->array_size = 0; - symbol->flags &= ~ARRAYMAXED; - free((char *) symbol->var_array); - symbol->var_array = NULL; - } -} - -/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */ - -/* - * The primary hassle here is that `iggy' needs to have some arbitrary - * array index put in it before we can clear the array, we can't - * just replace the loop with `delete foo'. - */ - -void -do_delete_loop(NODE *symbol, NODE *tree) -{ - size_t i; - NODE **lhs; - Func_ptr after_assign = NULL; - - if (symbol->type == Node_param_list) { - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_var) - return; - } - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if (symbol->type == Node_var_array) { - if (symbol->var_array == NULL) - return; - } else - fatal(_("delete: illegal use of variable `%s' as array"), - symbol->vname); - - /* get first index value */ - for (i = 0; i < symbol->array_size; i++) { - if (symbol->var_array[i] != NULL) { - lhs = get_lhs(tree->lnode, & after_assign, FALSE); - unref(*lhs); - *lhs = dupnode(symbol->var_array[i]->ahname); - break; - } - } - - /* blast the array in one shot */ - assoc_clear(symbol); -} - -/* grow_table --- grow a hash table */ - -static void -grow_table(NODE *symbol) -{ - NODE **old, **new, *chain, *next; - int i, j; - unsigned long hash1; - unsigned long oldsize, newsize; - /* - * This is an array of primes. We grow the table by an order of - * magnitude each time (not just doubling) so that growing is a - * rare operation. We expect, on average, that it won't happen - * more than twice. The final size is also chosen to be small - * enough so that MS-DOG mallocs can handle it. When things are - * very large (> 8K), we just double more or less, instead of - * just jumping from 8K to 64K. - */ - static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497, -#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist) - 131101, 262147, 524309, 1048583, 2097169, - 4194319, 8388617, 16777259, 33554467, - 67108879, 134217757, 268435459, 536870923, - 1073741827 -#endif - }; - - /* find next biggest hash size */ - newsize = oldsize = symbol->array_size; - for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) { - if (oldsize < sizes[i]) { - newsize = sizes[i]; - break; - } - } - - if (newsize == oldsize) { /* table already at max (!) */ - symbol->flags |= ARRAYMAXED; - return; - } - - /* allocate new table */ - emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table"); - memset(new, '\0', newsize * sizeof(NODE *)); - - /* brand new hash table, set things up and return */ - if (symbol->var_array == NULL) { - symbol->table_size = 0; - goto done; - } - - /* old hash table there, move stuff to new, free old */ - old = symbol->var_array; - for (i = 0; i < oldsize; i++) { - if (old[i] == NULL) - continue; - - for (chain = old[i]; chain != NULL; chain = next) { - next = chain->ahnext; - hash1 = hash(chain->ahname->stptr, - chain->ahname->stlen, newsize); - - /* remove from old list, add to new */ - chain->ahnext = new[hash1]; - new[hash1] = chain; - } - } - free(old); - -done: - /* - * note that symbol->table_size does not change if an old array, - * and is explicitly set to 0 if a new one. - */ - symbol->var_array = new; - symbol->array_size = newsize; -} - -/* pr_node --- print simple node info */ - -static void -pr_node(NODE *n) -{ - if ((n->flags & (NUM|NUMBER)) != 0) - printf("%g", n->numbr); - else - printf("%.*s", (int) n->stlen, n->stptr); -} - -/* assoc_dump --- dump the contents of an array */ - -NODE * -assoc_dump(NODE *symbol) -{ - int i; - NODE *bucket; - - if (symbol->var_array == NULL) { - printf(_("%s: empty (null)\n"), symbol->vname); - return tmp_number((AWKNUM) 0); - } - - if (symbol->table_size == 0) { - printf(_("%s: empty (zero)\n"), symbol->vname); - return tmp_number((AWKNUM) 0); - } - - printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname, - (int) symbol->table_size, (int) symbol->array_size); - - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; - bucket = bucket->ahnext) { - printf("%s: I: [(%p, %ld, %s) len %d <%.*s>] V: [", - symbol->vname, - bucket->ahname, - bucket->ahname->stref, - flags2str(bucket->ahname->flags), - (int) bucket->ahname->stlen, - (int) bucket->ahname->stlen, - bucket->ahname->stptr); - pr_node(bucket->ahvalue); - printf("]\n"); - } - } - - return tmp_number((AWKNUM) 0); -} - -/* do_adump --- dump an array: interface to assoc_dump */ - -NODE * -do_adump(NODE *tree) -{ - NODE *r, *a; - - a = tree->lnode; - - if (a->type == Node_param_list) { - printf(_("%s: is paramater\n"), a->vname); - a = stack_ptr[a->param_cnt]; - } - - if (a->type == Node_array_ref) { - printf(_("%s: array_ref to %s\n"), a->vname, - a->orig_array->vname); - a = a->orig_array; - } - - r = assoc_dump(a); - - return r; -} - -/* - * The following functions implement the builtin - * asort function. Initial work by Alan J. Broder, - * ajb@woti.com. - */ - -/* dup_table --- duplicate input symbol table "symbol" */ - -static void -dup_table(NODE *symbol, NODE *newsymb) -{ - NODE **old, **new, *chain, *bucket; - int i; - unsigned long cursize; - - /* find the current hash size */ - cursize = symbol->array_size; - - new = NULL; - - /* input is a brand new hash table, so there's nothing to copy */ - if (symbol->var_array == NULL) - newsymb->table_size = 0; - else { - /* old hash table there, dupnode stuff into a new table */ - - /* allocate new table */ - emalloc(new, NODE **, cursize * sizeof(NODE *), "dup_table"); - memset(new, '\0', cursize * sizeof(NODE *)); - - /* do the copying/dupnode'ing */ - old = symbol->var_array; - for (i = 0; i < cursize; i++) { - if (old[i] != NULL) { - for (chain = old[i]; chain != NULL; - chain = chain->ahnext) { - /* get a node for the linked list */ - getnode(bucket); - bucket->type = Node_ahash; - - /* - * copy the corresponding name and - * value from the original input list - */ - bucket->ahname = dupnode(chain->ahname); - bucket->ahvalue = dupnode(chain->ahvalue); - - /* - * put the node on the corresponding - * linked list in the new table - */ - bucket->ahnext = new[i]; - new[i] = bucket; - } - } - } - newsymb->table_size = symbol->table_size; - } - - newsymb->var_array = new; - newsymb->array_size = cursize; -} - -/* merge --- do a merge of two sorted lists */ - -static NODE * -merge(NODE *left, NODE *right) -{ - NODE *ans, *cur; - - if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { - ans = cur = left; - left = left->ahnext; - } else { - ans = cur = right; - right = right->ahnext; - } - - while (left != NULL && right != NULL) { - if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { - cur->ahnext = left; - cur = left; - left = left->ahnext; - } else { - cur->ahnext = right; - cur = right; - right = right->ahnext; - } - } - - cur->ahnext = (left != NULL ? left : right); - - return ans; -} - -/* merge_sort --- recursively sort the left and right sides of a list */ - -static NODE * -merge_sort(NODE *left, int size) -{ - NODE *right, *tmp; - int i, half; - - if (size <= 1) - return left; - - /* walk down the list, till just one before the midpoint */ - tmp = left; - half = size / 2; - for (i = 0; i < half-1; i++) - tmp = tmp->ahnext; - - /* split the list into two parts */ - right = tmp->ahnext; - tmp->ahnext = NULL; - - /* sort the left and right parts of the list */ - left = merge_sort(left, half); - right = merge_sort(right, size-half); - - /* merge the two sorted parts of the list */ - return merge(left, right); -} - - -/* - * assoc_from_list -- Populate an array with the contents of a list of NODEs, - * using increasing integers as the key. - */ - -static void -assoc_from_list(NODE *symbol, NODE *list) -{ - NODE *next; - int i = 0; - register int hash1; - - for (; list != NULL; list = next) { - next = list->ahnext; - - /* make an int out of i++ */ - i++; - list->ahname = make_number((AWKNUM) i); - (void) force_string(list->ahname); - - /* find the bucket where it belongs */ - hash1 = hash(list->ahname->stptr, list->ahname->stlen, - symbol->array_size); - - /* link the node into the chain at that bucket */ - list->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = list; - } -} - -/* - * assoc_sort_inplace --- sort all the values in symbol[], replacing - * the sorted values back into symbol[], indexed by integers starting with 1. - */ - -static NODE * -assoc_sort_inplace(NODE *symbol) -{ - int i, num; - NODE *bucket, *next, *list; - - if (symbol->var_array == NULL - || symbol->array_size <= 0 - || symbol->table_size <= 0) - return tmp_number((AWKNUM) 0); - - /* build a linked list out of all the entries in the table */ - list = NULL; - num = 0; - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) { - next = bucket->ahnext; - unref(bucket->ahname); - bucket->ahnext = list; - list = bucket; - num++; - } - symbol->var_array[i] = NULL; - } - - /* - * Sort the linked list of NODEs. - * (The especially nice thing about using a merge sort here is that - * we require absolutely no additional storage. This is handy if the - * array has grown to be very large.) - */ - list = merge_sort(list, num); - - /* - * now repopulate the original array, using increasing - * integers as the key - */ - assoc_from_list(symbol, list); - - return tmp_number((AWKNUM) num); -} - -/* do_asort --- do the actual work to sort the input array */ - -NODE * -do_asort(NODE *tree) -{ - NODE *src, *dest; - - src = tree->lnode; - dest = NULL; - - if (src->type == Node_param_list) - src = stack_ptr[src->param_cnt]; - if (src->type == Node_array_ref) - src = src->orig_array; - if (src->type != Node_var_array) - fatal(_("asort: first argument is not an array")); - - if (tree->rnode != NULL) { /* 2nd optional arg */ - dest = tree->rnode->lnode; - if (dest->type == Node_param_list) - dest = stack_ptr[dest->param_cnt]; - if (dest->type == Node_array_ref) - dest = dest->orig_array; - if (dest->type != Node_var && dest->type != Node_var_array) - fatal(_("asort: second argument is not an array")); - dest->type = Node_var_array; - assoc_clear(dest); - dup_table(src, dest); - } - - return dest != NULL ? assoc_sort_inplace(dest) : assoc_sort_inplace(src); -} |