diff options
Diffstat (limited to 'contrib/groff/libbib/index.cc')
-rw-r--r-- | contrib/groff/libbib/index.cc | 641 |
1 files changed, 0 insertions, 641 deletions
diff --git a/contrib/groff/libbib/index.cc b/contrib/groff/libbib/index.cc deleted file mode 100644 index 55737717dcbe..000000000000 --- a/contrib/groff/libbib/index.cc +++ /dev/null @@ -1,641 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <errno.h> - -#include "posix.h" -#include "lib.h" -#include "cset.h" -#include "cmap.h" -#include "errarg.h" -#include "error.h" - -#include "refid.h" -#include "search.h" -#include "index.h" -#include "defs.h" - -#include "nonposix.h" - -// Interface to mmap. -extern "C" { - void *mapread(int fd, int len); - int unmap(void *, int len); -} - -#if 0 -const -#endif -int minus_one = -1; - -int verify_flag = 0; - -struct word_list; - -class index_search_item : public search_item { - search_item *out_of_date_files; - index_header header; - char *buffer; - void *map_addr; - int map_len; - tag *tags; - int *table; - int *lists; - char *pool; - char *key_buffer; - char *filename_buffer; - int filename_buflen; - char **common_words_table; - int common_words_table_size; - const char *ignore_fields; - time_t mtime; - - const char *do_verify(); - const int *search1(const char **pp, const char *end); - const int *search(const char *ptr, int length, int **temp_listp); - const char *munge_filename(const char *); - void read_common_words_file(); - void add_out_of_date_file(int fd, const char *filename, int fid); -public: - index_search_item(const char *, int); - ~index_search_item(); - int load(int fd); - search_item_iterator *make_search_item_iterator(const char *); - int verify(); - void check_files(); - int next_filename_id() const; - friend class index_search_item_iterator; -}; - -class index_search_item_iterator : public search_item_iterator { - index_search_item *indx; - search_item_iterator *out_of_date_files_iter; - search_item *next_out_of_date_file; - const int *found_list; - int *temp_list; - char *buf; - int buflen; - linear_searcher searcher; - char *query; - int get_tag(int tagno, const linear_searcher &, const char **, int *, - reference_id *); -public: - index_search_item_iterator(index_search_item *, const char *); - ~index_search_item_iterator(); - int next(const linear_searcher &, const char **, int *, reference_id *); -}; - - -index_search_item::index_search_item(const char *filename, int fid) -: search_item(filename, fid), out_of_date_files(0), buffer(0), map_addr(0), - map_len(0), key_buffer(0), filename_buffer(0), filename_buflen(0), - common_words_table(0) -{ -} - -index_search_item::~index_search_item() -{ - if (buffer) - free(buffer); - if (map_addr) { - if (unmap(map_addr, map_len) < 0) - error("unmap: %1", strerror(errno)); - } - while (out_of_date_files) { - search_item *tem = out_of_date_files; - out_of_date_files = out_of_date_files->next; - delete tem; - } - a_delete filename_buffer; - a_delete key_buffer; - if (common_words_table) { - for (int i = 0; i < common_words_table_size; i++) - a_delete common_words_table[i]; - a_delete common_words_table; - } -} - -class file_closer { - int *fdp; -public: - file_closer(int &fd) : fdp(&fd) { } - ~file_closer() { close(*fdp); } -}; - -// Tell the compiler that a variable is intentionally unused. -inline void unused(void *) { } - -int index_search_item::load(int fd) -{ - file_closer fd_closer(fd); // close fd on return - unused(&fd_closer); - struct stat sb; - if (fstat(fd, &sb) < 0) { - error("can't fstat `%1': %2", name, strerror(errno)); - return 0; - } - if (!S_ISREG(sb.st_mode)) { - error("`%1' is not a regular file", name); - return 0; - } - mtime = sb.st_mtime; - int size = int(sb.st_size); - char *addr; - map_addr = mapread(fd, size); - if (map_addr) { - addr = (char *)map_addr; - map_len = size; - } - else { - addr = buffer = (char *)malloc(size); - if (buffer == 0) { - error("can't allocate buffer for `%1'", name); - return 0; - } - char *ptr = buffer; - int bytes_to_read = size; - while (bytes_to_read > 0) { - int nread = read(fd, ptr, bytes_to_read); - if (nread == 0) { - error("unexpected EOF on `%1'", name); - return 0; - } - if (nread < 0) { - error("read error on `%1': %2", name, strerror(errno)); - return 0; - } - bytes_to_read -= nread; - ptr += nread; - } - } - header = *(index_header *)addr; - if (header.magic != INDEX_MAGIC) { - error("`%1' is not an index file: wrong magic number", name); - return 0; - } - if (header.version != INDEX_VERSION) { - error("version number in `%1' is wrong: was %2, should be %3", - name, header.version, INDEX_VERSION); - return 0; - } - int sz = (header.tags_size * sizeof(tag) - + header.lists_size * sizeof(int) - + header.table_size * sizeof(int) - + header.strings_size - + sizeof(header)); - if (sz != size) { - error("size of `%1' is wrong: was %2, should be %3", - name, size, sz); - return 0; - } - tags = (tag *)(addr + sizeof(header)); - lists = (int *)(tags + header.tags_size); - table = (int *)(lists + header.lists_size); - pool = (char *)(table + header.table_size); - ignore_fields = strchr(strchr(pool, '\0') + 1, '\0') + 1; - key_buffer = new char[header.truncate]; - read_common_words_file(); - return 1; -} - -const char *index_search_item::do_verify() -{ - if (tags == 0) - return "not loaded"; - if (lists[header.lists_size - 1] >= 0) - return "last list element not negative"; - int i; - for (i = 0; i < header.table_size; i++) { - int li = table[i]; - if (li >= header.lists_size) - return "bad list index"; - if (li >= 0) { - for (int *ptr = lists + li; *ptr >= 0; ptr++) { - if (*ptr >= header.tags_size) - return "bad tag index"; - if (*ptr >= ptr[1] && ptr[1] >= 0) - return "list not ordered"; - } - } - } - for (i = 0; i < header.tags_size; i++) { - if (tags[i].filename_index >= header.strings_size) - return "bad index in tags"; - if (tags[i].length < 0) - return "bad length in tags"; - if (tags[i].start < 0) - return "bad start in tags"; - } - if (pool[header.strings_size - 1] != '\0') - return "last character in pool not nul"; - return 0; -} - -int index_search_item::verify() -{ - const char *reason = do_verify(); - if (!reason) - return 1; - error("`%1' is bad: %2", name, reason); - return 0; -} - -int index_search_item::next_filename_id() const -{ - return filename_id + header.strings_size + 1; -} - -search_item_iterator *index_search_item::make_search_item_iterator( - const char *query) -{ - return new index_search_item_iterator(this, query); -} - -search_item *make_index_search_item(const char *filename, int fid) -{ - char *index_filename = new char[strlen(filename) + sizeof(INDEX_SUFFIX)]; - strcpy(index_filename, filename); - strcat(index_filename, INDEX_SUFFIX); - int fd = open(index_filename, O_RDONLY | O_BINARY); - if (fd < 0) - return 0; - index_search_item *item = new index_search_item(index_filename, fid); - a_delete index_filename; - if (!item->load(fd)) { - close(fd); - delete item; - return 0; - } - else if (verify_flag && !item->verify()) { - delete item; - return 0; - } - else { - item->check_files(); - return item; - } -} - - -index_search_item_iterator::index_search_item_iterator(index_search_item *ind, - const char *q) -: indx(ind), out_of_date_files_iter(0), next_out_of_date_file(0), temp_list(0), - buf(0), buflen(0), - searcher(q, strlen(q), ind->ignore_fields, ind->header.truncate), - query(strsave(q)) -{ - found_list = indx->search(q, strlen(q), &temp_list); - if (!found_list) { - found_list = &minus_one; - warning("all keys would have been discarded in constructing index `%1'", - indx->name); - } -} - -index_search_item_iterator::~index_search_item_iterator() -{ - a_delete temp_list; - a_delete buf; - a_delete query; - delete out_of_date_files_iter; -} - -int index_search_item_iterator::next(const linear_searcher &, - const char **pp, int *lenp, - reference_id *ridp) -{ - if (found_list) { - for (;;) { - int tagno = *found_list; - if (tagno == -1) - break; - found_list++; - if (get_tag(tagno, searcher, pp, lenp, ridp)) - return 1; - } - found_list = 0; - next_out_of_date_file = indx->out_of_date_files; - } - while (next_out_of_date_file) { - if (out_of_date_files_iter == 0) - out_of_date_files_iter - = next_out_of_date_file->make_search_item_iterator(query); - if (out_of_date_files_iter->next(searcher, pp, lenp, ridp)) - return 1; - delete out_of_date_files_iter; - out_of_date_files_iter = 0; - next_out_of_date_file = next_out_of_date_file->next; - } - return 0; -} - -int index_search_item_iterator::get_tag(int tagno, - const linear_searcher &searcher, - const char **pp, int *lenp, - reference_id *ridp) -{ - if (tagno < 0 || tagno >= indx->header.tags_size) { - error("bad tag number"); - return 0; - } - tag *tp = indx->tags + tagno; - const char *filename = indx->munge_filename(indx->pool + tp->filename_index); - int fd = open(filename, O_RDONLY | O_BINARY); - if (fd < 0) { - error("can't open `%1': %2", filename, strerror(errno)); - return 0; - } - struct stat sb; - if (fstat(fd, &sb) < 0) { - error("can't fstat: %1", strerror(errno)); - close(fd); - return 0; - } - time_t mtime = sb.st_mtime; - if (mtime > indx->mtime) { - indx->add_out_of_date_file(fd, filename, - indx->filename_id + tp->filename_index); - return 0; - } - int res = 0; - FILE *fp = fdopen(fd, FOPEN_RB); - if (!fp) { - error("fdopen failed"); - close(fd); - return 0; - } - if (tp->start != 0 && fseek(fp, long(tp->start), 0) < 0) - error("can't seek on `%1': %2", filename, strerror(errno)); - else { - int length = tp->length; - int err = 0; - if (length == 0) { - struct stat sb; - if (fstat(fileno(fp), &sb) < 0) { - error("can't stat `%1': %2", filename, strerror(errno)); - err = 1; - } - else if (!S_ISREG(sb.st_mode)) { - error("`%1' is not a regular file", filename); - err = 1; - } - else - length = int(sb.st_size); - } - if (!err) { - if (length + 2 > buflen) { - a_delete buf; - buflen = length + 2; - buf = new char[buflen]; - } - if (fread(buf + 1, 1, length, fp) != length) - error("fread on `%1' failed: %2", filename, strerror(errno)); - else { - buf[0] = '\n'; - // Remove the CR characters from CRLF pairs. - int sidx = 1, didx = 1; - for ( ; sidx < length + 1; sidx++, didx++) - { - if (buf[sidx] == '\r') - { - if (buf[++sidx] != '\n') - buf[didx++] = '\r'; - else - length--; - } - if (sidx != didx) - buf[didx] = buf[sidx]; - } - buf[length + 1] = '\n'; - res = searcher.search(buf + 1, buf + 2 + length, pp, lenp); - if (res && ridp) - *ridp = reference_id(indx->filename_id + tp->filename_index, - tp->start); - } - } - } - fclose(fp); - return res; -} - -const char *index_search_item::munge_filename(const char *filename) -{ - if (IS_ABSOLUTE(filename)) - return filename; - const char *cwd = pool; - int need_slash = (cwd[0] != 0 - && strchr(DIR_SEPS, strchr(cwd, '\0')[-1]) == 0); - int len = strlen(cwd) + strlen(filename) + need_slash + 1; - if (len > filename_buflen) { - a_delete filename_buffer; - filename_buflen = len; - filename_buffer = new char[len]; - } - strcpy(filename_buffer, cwd); - if (need_slash) - strcat(filename_buffer, "/"); - strcat(filename_buffer, filename); - return filename_buffer; -} - -const int *index_search_item::search1(const char **pp, const char *end) -{ - while (*pp < end && !csalnum(**pp)) - *pp += 1; - if (*pp >= end) - return 0; - const char *start = *pp; - while (*pp < end && csalnum(**pp)) - *pp += 1; - int len = *pp - start; - if (len < header.shortest) - return 0; - if (len > header.truncate) - len = header.truncate; - int is_number = 1; - for (int i = 0; i < len; i++) - if (csdigit(start[i])) - key_buffer[i] = start[i]; - else { - key_buffer[i] = cmlower(start[i]); - is_number = 0; - } - if (is_number && !(len == 4 && start[0] == '1' && start[1] == '9')) - return 0; - unsigned hc = hash(key_buffer, len); - if (common_words_table) { - for (int h = hc % common_words_table_size; - common_words_table[h]; - --h) { - if (strlen(common_words_table[h]) == len - && memcmp(common_words_table[h], key_buffer, len) == 0) - return 0; - if (h == 0) - h = common_words_table_size; - } - } - int li = table[int(hc % header.table_size)]; - return li < 0 ? &minus_one : lists + li; -} - -static void merge(int *result, const int *s1, const int *s2) -{ - for (; *s1 >= 0; s1++) { - while (*s2 >= 0 && *s2 < *s1) - s2++; - if (*s2 == *s1) - *result++ = *s2; - } - *result++ = -1; -} - -const int *index_search_item::search(const char *ptr, int length, - int **temp_listp) -{ - const char *end = ptr + length; - if (*temp_listp) { - a_delete *temp_listp; - *temp_listp = 0; - } - const int *first_list = 0; - while (ptr < end && (first_list = search1(&ptr, end)) == 0) - ; - if (!first_list) - return 0; - if (*first_list < 0) - return first_list; - const int *second_list = 0; - while (ptr < end && (second_list = search1(&ptr, end)) == 0) - ; - if (!second_list) - return first_list; - if (*second_list < 0) - return second_list; - const int *p; - for (p = first_list; *p >= 0; p++) - ; - int len = p - first_list; - for (p = second_list; *p >= 0; p++) - ; - if (p - second_list < len) - len = p - second_list; - int *matches = new int[len + 1]; - merge(matches, first_list, second_list); - while (ptr < end) { - const int *list = search1(&ptr, end); - if (list != 0) { - if (*list < 0) { - a_delete matches; - return list; - } - merge(matches, matches, list); - if (*matches < 0) { - a_delete matches; - return &minus_one; - } - } - } - *temp_listp = matches; - return matches; -} - -void index_search_item::read_common_words_file() -{ - if (header.common <= 0) - return; - const char *common_words_file = munge_filename(strchr(pool, '\0') + 1); - errno = 0; - FILE *fp = fopen(common_words_file, "r"); - if (!fp) { - error("can't open `%1': %2", common_words_file, strerror(errno)); - return; - } - common_words_table_size = 2*header.common + 1; - while (!is_prime(common_words_table_size)) - common_words_table_size++; - common_words_table = new char *[common_words_table_size]; - for (int i = 0; i < common_words_table_size; i++) - common_words_table[i] = 0; - int count = 0; - int key_len = 0; - for (;;) { - int c = getc(fp); - while (c != EOF && !csalnum(c)) - c = getc(fp); - if (c == EOF) - break; - do { - if (key_len < header.truncate) - key_buffer[key_len++] = cmlower(c); - c = getc(fp); - } while (c != EOF && csalnum(c)); - if (key_len >= header.shortest) { - int h = hash(key_buffer, key_len) % common_words_table_size; - while (common_words_table[h]) { - if (h == 0) - h = common_words_table_size; - --h; - } - common_words_table[h] = new char[key_len + 1]; - memcpy(common_words_table[h], key_buffer, key_len); - common_words_table[h][key_len] = '\0'; - } - if (++count >= header.common) - break; - key_len = 0; - if (c == EOF) - break; - } - fclose(fp); -} - -void index_search_item::add_out_of_date_file(int fd, const char *filename, - int fid) -{ - search_item **pp; - for (pp = &out_of_date_files; *pp; pp = &(*pp)->next) - if ((*pp)->is_named(filename)) - return; - *pp = make_linear_search_item(fd, filename, fid); - warning("`%1' modified since `%2' created", filename, name); -} - -void index_search_item::check_files() -{ - const char *pool_end = pool + header.strings_size; - for (const char *ptr = strchr(ignore_fields, '\0') + 1; - ptr < pool_end; - ptr = strchr(ptr, '\0') + 1) { - const char *path = munge_filename(ptr); - struct stat sb; - if (stat(path, &sb) < 0) - error("can't stat `%1': %2", path, strerror(errno)); - else if (sb.st_mtime > mtime) { - int fd = open(path, O_RDONLY | O_BINARY); - if (fd < 0) - error("can't open `%1': %2", path, strerror(errno)); - else - add_out_of_date_file(fd, path, filename_id + (ptr - pool)); - } - } -} |