aboutsummaryrefslogtreecommitdiff
path: root/contrib/groff/refer
diff options
context:
space:
mode:
authorRuslan Ermilov <ru@FreeBSD.org>2001-04-17 12:23:50 +0000
committerRuslan Ermilov <ru@FreeBSD.org>2001-04-17 12:23:50 +0000
commitd0bf30505c50ee6b6f217d5e2bd1a67938ce81e4 (patch)
treed0ecc32b66c84ba03b40de7da66350fc02b90d02 /contrib/groff/refer
parentaf2786323e797d59708a2ca7929031c965bc2dde (diff)
This commit was generated by cvs2svn to compensate for changes in r75587,
which included commits to RCS files with non-trunk default branches.
Notes
Notes: svn path=/head/; revision=75588
Diffstat (limited to 'contrib/groff/refer')
-rw-r--r--contrib/groff/refer/Makefile.sub23
-rw-r--r--contrib/groff/refer/TODO124
-rw-r--r--contrib/groff/refer/command.cc807
-rw-r--r--contrib/groff/refer/command.h36
-rw-r--r--contrib/groff/refer/label.y1177
-rw-r--r--contrib/groff/refer/ref.cc1160
-rw-r--r--contrib/groff/refer/ref.h120
-rw-r--r--contrib/groff/refer/refer.cc1228
-rw-r--r--contrib/groff/refer/refer.h78
-rw-r--r--contrib/groff/refer/refer.man1302
-rw-r--r--contrib/groff/refer/token.cc378
-rw-r--r--contrib/groff/refer/token.h88
12 files changed, 0 insertions, 6521 deletions
diff --git a/contrib/groff/refer/Makefile.sub b/contrib/groff/refer/Makefile.sub
deleted file mode 100644
index 1631b5e36e05..000000000000
--- a/contrib/groff/refer/Makefile.sub
+++ /dev/null
@@ -1,23 +0,0 @@
-PROG=refer
-MAN1=refer.n
-XLIBS=$(LIBBIB) $(LIBGROFF)
-MLIB=$(LIBM)
-OBJS=\
- command.o \
- label.o \
- ref.o \
- refer.o \
- token.o
-CCSRCS=\
- $(srcdir)/command.cc \
- $(srcdir)/ref.cc \
- $(srcdir)/refer.cc \
- $(srcdir)/token.cc
-HDRS=\
- $(srcdir)/refer.h \
- $(srcdir)/token.h \
- $(srcdir)/command.h \
- $(srcdir)/ref.h
-GRAM=$(srcdir)/label.y
-YTABC=$(srcdir)/label.cc
-NAMEPREFIX=$(g)
diff --git a/contrib/groff/refer/TODO b/contrib/groff/refer/TODO
deleted file mode 100644
index 5bbd9bff1e8c..000000000000
--- a/contrib/groff/refer/TODO
+++ /dev/null
@@ -1,124 +0,0 @@
-inline references
-
-Some sort of macro/subroutine that can cover several references.
-
-move-punctuation should ignore multiple punctuation characters.
-
-Make the index files machine independent.
-
-Allow search keys to be negated (with !) to indicate that the
-reference should not contain the key. Ignore negated keys during
-indexed searching.
-
-Provide an option with lkbib and lookbib that prints the location
-(filename, position) of each reference. Need to map filename_id's
-back to filenames.
-
-Rename join-authors to join-fields. Have a separate label-join-fields
-command used by @ and #.
-
-Have some sort of quantifier: eg $.n#A means execute `$.n' for each
-instance of an A field, setting $ to that field, and then join the
-results using the join-authors command.
-
-no-text-in-bracket command which says not to allow post_text and
-pre_text when the [] flags has been given. Useful for superscripted
-footnotes.
-
-Make it possible to translate - to \(en in page ranges.
-
-Trim eign a bit.
-
-In indexed searching discard all numeric keys except dates.
-
-Allow `\ ' to separate article from first word.
-
-%also
-
-Option automatically to supply [] flags in every reference.
-
-See if we can avoid requiring a comma before jr. and so on
-in find_last_name().
-
-Cache sortified authors in authors string during tentative evaluation of
-label specification.
-
-Possibly don't allow * and % expressions in the first part of ?:, | or
-& expressions.
-
-Handle better the case where <> occurs inside functions and in the
-first operand of ~. Or perhaps implement <> using some magic character
-in the string.
-
-Should special treatment be given to lines beginning with . in
-references? (Unix refer seems to treat them like `%').
-
-Add global flag to control whether all files should be stat-ed after
-loading, and whether they should be stat-ed before each search.
-Perhaps make this dependent on the number of files there are.
-
-Option to truncate keys to truncate_len in linear searching.
-
-Allow multiple -f options in indxbib.
-
-In indxbib, possibly store common words rather than common words
-filename. In this case store only words that are actually present in
-the file.
-
-Perhaps we should put out an obnoxious copyright message when lookbib
-starts up.
-
-Provide an option that writes a file containing just the references
-actually used. Useful if you want to distribute a document.
-
-Have a magic token such that
-%A <sort stuff><magic token><print stuff>
-will print as though it were
-%A <print stuff>
-but sort as though it were
-%A <sort stuff>
-Do we need this if we can specify author alternatives for sorting?
-No, provided we have separate alternatives for @.
-
-In consider_authors when last names are ambiguous we might be able to
-use just the first name and not Jr. bit. Or we might be able to
-abbreviate the author.
-
-It ought to be possible to specify an alternative field to sort on
-instead of date. (ie if there's a field giving the type of document --
-these references should sort after any years)
-
-Provide a way to execute a command using a command-line option.
-
-Option to set the label-spec as a command-line option (-L).
-
-Command to to specify which fields can occur multiple times:
-multiple AE
-
-Command to specify how various fields sort:
-aort-as-name A
-sort-as-date D
-sort-as-title T
-sort-as-other O
-
-Command to specify which fields are author fields:
-# if we don't have A use field Q
-author-fields AQ
-
-Commands to set properties of tokens.
-sortify-token \(ae ae
-uppercase-token \[ae] \[AE]
-
-Command to set the names of months:
-months january february march april may ...
-
-Perhaps provide some sort of macro capability:
-# perhaps a macro capability
-defmacro foo
-annotation-field $1
-endef
-
-Command to control strings used in capitalization
-capitalize-start \s+2
-capitalize-end \s-2
-(perhaps make these arguments to the capitalize command.)
diff --git a/contrib/groff/refer/command.cc b/contrib/groff/refer/command.cc
deleted file mode 100644
index 004189eeb03a..000000000000
--- a/contrib/groff/refer/command.cc
+++ /dev/null
@@ -1,807 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#include "refer.h"
-#include "refid.h"
-#include "search.h"
-#include "command.h"
-
-cset cs_field_name = csalpha;
-
-class input_item {
- input_item *next;
- char *filename;
- int first_lineno;
- string buffer;
- const char *ptr;
- const char *end;
-public:
- input_item(string &, const char *, int = 1);
- ~input_item();
- int get_char();
- int peek_char();
- void skip_char();
- int get_location(const char **, int *);
-
- friend class input_stack;
-};
-
-input_item::input_item(string &s, const char *fn, int ln)
-: filename(strsave(fn)), first_lineno(ln)
-{
- buffer.move(s);
- ptr = buffer.contents();
- end = ptr + buffer.length();
-}
-
-input_item::~input_item()
-{
- a_delete filename;
-}
-
-inline int input_item::peek_char()
-{
- if (ptr >= end)
- return EOF;
- else
- return (unsigned char)*ptr;
-}
-
-inline int input_item::get_char()
-{
- if (ptr >= end)
- return EOF;
- else
- return (unsigned char)*ptr++;
-}
-
-inline void input_item::skip_char()
-{
- ptr++;
-}
-
-int input_item::get_location(const char **filenamep, int *linenop)
-{
- *filenamep = filename;
- if (ptr == buffer.contents())
- *linenop = first_lineno;
- else {
- int ln = first_lineno;
- const char *e = ptr - 1;
- for (const char *p = buffer.contents(); p < e; p++)
- if (*p == '\n')
- ln++;
- *linenop = ln;
- }
- return 1;
-}
-
-class input_stack {
- static input_item *top;
-public:
- static void init();
- static int get_char();
- static int peek_char();
- static void skip_char() { top->skip_char(); }
- static void push_file(const char *);
- static void push_string(string &, const char *, int);
- static void error(const char *format,
- const errarg &arg1 = empty_errarg,
- const errarg &arg2 = empty_errarg,
- const errarg &arg3 = empty_errarg);
-};
-
-input_item *input_stack::top = 0;
-
-void input_stack::init()
-{
- while (top) {
- input_item *tem = top;
- top = top->next;
- delete tem;
- }
-}
-
-int input_stack::get_char()
-{
- while (top) {
- int c = top->get_char();
- if (c >= 0)
- return c;
- input_item *tem = top;
- top = top->next;
- delete tem;
- }
- return -1;
-}
-
-int input_stack::peek_char()
-{
- while (top) {
- int c = top->peek_char();
- if (c >= 0)
- return c;
- input_item *tem = top;
- top = top->next;
- delete tem;
- }
- return -1;
-}
-
-void input_stack::push_file(const char *fn)
-{
- FILE *fp;
- if (strcmp(fn, "-") == 0) {
- fp = stdin;
- fn = "<standard input>";
- }
- else {
- errno = 0;
- fp = fopen(fn, "r");
- if (fp == 0) {
- error("can't open `%1': %2", fn, strerror(errno));
- return;
- }
- }
- string buf;
- int bol = 1;
- int lineno = 1;
- for (;;) {
- int c = getc(fp);
- if (bol && c == '.') {
- // replace lines beginning with .R1 or .R2 with a blank line
- c = getc(fp);
- if (c == 'R') {
- c = getc(fp);
- if (c == '1' || c == '2') {
- int cc = c;
- c = getc(fp);
- if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
- while (c != '\n' && c != EOF)
- c = getc(fp);
- }
- else {
- buf += '.';
- buf += 'R';
- buf += cc;
- }
- }
- else {
- buf += '.';
- buf += 'R';
- }
- }
- else
- buf += '.';
- }
- if (c == EOF)
- break;
- if (illegal_input_char(c))
- error_with_file_and_line(fn, lineno,
- "illegal input character code %1", int(c));
- else {
- buf += c;
- if (c == '\n') {
- bol = 1;
- lineno++;
- }
- else
- bol = 0;
- }
- }
- if (fp != stdin)
- fclose(fp);
- if (buf.length() > 0 && buf[buf.length() - 1] != '\n')
- buf += '\n';
- input_item *it = new input_item(buf, fn);
- it->next = top;
- top = it;
-}
-
-void input_stack::push_string(string &s, const char *filename, int lineno)
-{
- input_item *it = new input_item(s, filename, lineno);
- it->next = top;
- top = it;
-}
-
-void input_stack::error(const char *format, const errarg &arg1,
- const errarg &arg2, const errarg &arg3)
-{
- const char *filename;
- int lineno;
- for (input_item *it = top; it; it = it->next)
- if (it->get_location(&filename, &lineno)) {
- error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3);
- return;
- }
- ::error(format, arg1, arg2, arg3);
-}
-
-void command_error(const char *format, const errarg &arg1,
- const errarg &arg2, const errarg &arg3)
-{
- input_stack::error(format, arg1, arg2, arg3);
-}
-
-// # not recognized in ""
-// \<newline> is recognized in ""
-// # does not conceal newline
-// if missing closing quote, word extends to end of line
-// no special treatment of \ other than before newline
-// \<newline> not recognized after #
-// ; allowed as alternative to newline
-// ; not recognized in ""
-// don't clear word_buffer; just append on
-// return -1 for EOF, 0 for newline, 1 for word
-
-int get_word(string &word_buffer)
-{
- int c = input_stack::get_char();
- for (;;) {
- if (c == '#') {
- do {
- c = input_stack::get_char();
- } while (c != '\n' && c != EOF);
- break;
- }
- if (c == '\\' && input_stack::peek_char() == '\n')
- input_stack::skip_char();
- else if (c != ' ' && c != '\t')
- break;
- c = input_stack::get_char();
- }
- if (c == EOF)
- return -1;
- if (c == '\n' || c == ';')
- return 0;
- if (c == '"') {
- for (;;) {
- c = input_stack::peek_char();
- if (c == EOF || c == '\n')
- break;
- input_stack::skip_char();
- if (c == '"') {
- int d = input_stack::peek_char();
- if (d == '"')
- input_stack::skip_char();
- else
- break;
- }
- else if (c == '\\') {
- int d = input_stack::peek_char();
- if (d == '\n')
- input_stack::skip_char();
- else
- word_buffer += '\\';
- }
- else
- word_buffer += c;
- }
- return 1;
- }
- word_buffer += c;
- for (;;) {
- c = input_stack::peek_char();
- if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';')
- break;
- input_stack::skip_char();
- if (c == '\\') {
- int d = input_stack::peek_char();
- if (d == '\n')
- input_stack::skip_char();
- else
- word_buffer += '\\';
- }
- else
- word_buffer += c;
- }
- return 1;
-}
-
-union argument {
- const char *s;
- int n;
-};
-
-// This is for debugging.
-
-static void echo_command(int argc, argument *argv)
-{
- for (int i = 0; i < argc; i++)
- fprintf(stderr, "%s\n", argv[i].s);
-}
-
-static void include_command(int argc, argument *argv)
-{
- assert(argc == 1);
- input_stack::push_file(argv[0].s);
-}
-
-static void capitalize_command(int argc, argument *argv)
-{
- if (argc > 0)
- capitalize_fields = argv[0].s;
- else
- capitalize_fields.clear();
-}
-
-static void accumulate_command(int, argument *)
-{
- accumulate = 1;
-}
-
-static void no_accumulate_command(int, argument *)
-{
- accumulate = 0;
-}
-
-static void move_punctuation_command(int, argument *)
-{
- move_punctuation = 1;
-}
-
-static void no_move_punctuation_command(int, argument *)
-{
- move_punctuation = 0;
-}
-
-static void sort_command(int argc, argument *argv)
-{
- if (argc == 0)
- sort_fields = "AD";
- else
- sort_fields = argv[0].s;
- accumulate = 1;
-}
-
-static void no_sort_command(int, argument *)
-{
- sort_fields.clear();
-}
-
-static void articles_command(int argc, argument *argv)
-{
- articles.clear();
- int i;
- for (i = 0; i < argc; i++) {
- articles += argv[i].s;
- articles += '\0';
- }
- int len = articles.length();
- for (i = 0; i < len; i++)
- articles[i] = cmlower(articles[i]);
-}
-
-static void database_command(int argc, argument *argv)
-{
- for (int i = 0; i < argc; i++)
- database_list.add_file(argv[i].s);
-}
-
-static void default_database_command(int, argument *)
-{
- search_default = 1;
-}
-
-static void no_default_database_command(int, argument *)
-{
- search_default = 0;
-}
-
-static void bibliography_command(int argc, argument *argv)
-{
- const char *saved_filename = current_filename;
- int saved_lineno = current_lineno;
- int saved_label_in_text = label_in_text;
- label_in_text = 0;
- if (!accumulate)
- fputs(".]<\n", stdout);
- for (int i = 0; i < argc; i++)
- do_bib(argv[i].s);
- if (accumulate)
- output_references();
- else
- fputs(".]>\n", stdout);
- current_filename = saved_filename;
- current_lineno = saved_lineno;
- label_in_text = saved_label_in_text;
-}
-
-static void annotate_command(int argc, argument *argv)
-{
- if (argc > 0)
- annotation_field = argv[0].s[0];
- else
- annotation_field = 'X';
- if (argc == 2)
- annotation_macro = argv[1].s;
- else
- annotation_macro = "AP";
-}
-
-static void no_annotate_command(int, argument *)
-{
- annotation_macro.clear();
- annotation_field = -1;
-}
-
-static void reverse_command(int, argument *argv)
-{
- reverse_fields = argv[0].s;
-}
-
-static void no_reverse_command(int, argument *)
-{
- reverse_fields.clear();
-}
-
-static void abbreviate_command(int argc, argument *argv)
-{
- abbreviate_fields = argv[0].s;
- period_before_initial = argc > 1 ? argv[1].s : ". ";
- period_before_last_name = argc > 2 ? argv[2].s : ". ";
- period_before_other = argc > 3 ? argv[3].s : ". ";
- period_before_hyphen = argc > 4 ? argv[4].s : ".";
-}
-
-static void no_abbreviate_command(int, argument *)
-{
- abbreviate_fields.clear();
-}
-
-string search_ignore_fields;
-
-static void search_ignore_command(int argc, argument *argv)
-{
- if (argc > 0)
- search_ignore_fields = argv[0].s;
- else
- search_ignore_fields = "XYZ";
- search_ignore_fields += '\0';
- linear_ignore_fields = search_ignore_fields.contents();
-}
-
-static void no_search_ignore_command(int, argument *)
-{
- linear_ignore_fields = "";
-}
-
-static void search_truncate_command(int argc, argument *argv)
-{
- if (argc > 0)
- linear_truncate_len = argv[0].n;
- else
- linear_truncate_len = 6;
-}
-
-static void no_search_truncate_command(int, argument *)
-{
- linear_truncate_len = -1;
-}
-
-static void discard_command(int argc, argument *argv)
-{
- if (argc == 0)
- discard_fields = "XYZ";
- else
- discard_fields = argv[0].s;
- accumulate = 1;
-}
-
-static void no_discard_command(int, argument *)
-{
- discard_fields.clear();
-}
-
-static void label_command(int, argument *argv)
-{
- set_label_spec(argv[0].s);
-}
-
-static void abbreviate_label_ranges_command(int argc, argument *argv)
-{
- abbreviate_label_ranges = 1;
- label_range_indicator = argc > 0 ? argv[0].s : "-";
-}
-
-static void no_abbreviate_label_ranges_command(int, argument *)
-{
- abbreviate_label_ranges = 0;
-}
-
-static void label_in_reference_command(int, argument *)
-{
- label_in_reference = 1;
-}
-
-static void no_label_in_reference_command(int, argument *)
-{
- label_in_reference = 0;
-}
-
-static void label_in_text_command(int, argument *)
-{
- label_in_text = 1;
-}
-
-static void no_label_in_text_command(int, argument *)
-{
- label_in_text = 0;
-}
-
-static void sort_adjacent_labels_command(int, argument *)
-{
- sort_adjacent_labels = 1;
-}
-
-static void no_sort_adjacent_labels_command(int, argument *)
-{
- sort_adjacent_labels = 0;
-}
-
-static void date_as_label_command(int argc, argument *argv)
-{
- if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*"))
- date_as_label = 1;
-}
-
-static void no_date_as_label_command(int, argument *)
-{
- date_as_label = 0;
-}
-
-static void short_label_command(int, argument *argv)
-{
- if (set_short_label_spec(argv[0].s))
- short_label_flag = 1;
-}
-
-static void no_short_label_command(int, argument *)
-{
- short_label_flag = 0;
-}
-
-static void compatible_command(int, argument *)
-{
- compatible_flag = 1;
-}
-
-static void no_compatible_command(int, argument *)
-{
- compatible_flag = 0;
-}
-
-static void join_authors_command(int argc, argument *argv)
-{
- join_authors_exactly_two = argv[0].s;
- join_authors_default = argc > 1 ? argv[1].s : argv[0].s;
- join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s;
-}
-
-static void bracket_label_command(int, argument *argv)
-{
- pre_label = argv[0].s;
- post_label = argv[1].s;
- sep_label = argv[2].s;
-}
-
-static void separate_label_second_parts_command(int, argument *argv)
-{
- separate_label_second_parts = argv[0].s;
-}
-
-static void et_al_command(int argc, argument *argv)
-{
- et_al = argv[0].s;
- et_al_min_elide = argv[1].n;
- if (et_al_min_elide < 1)
- et_al_min_elide = 1;
- et_al_min_total = argc >= 3 ? argv[2].n : 0;
-}
-
-static void no_et_al_command(int, argument *)
-{
- et_al.clear();
- et_al_min_elide = 0;
-}
-
-typedef void (*command_t)(int, argument *);
-
-/* arg_types is a string describing the numbers and types of arguments.
-s means a string, i means an integer, f is a list of fields, F is
-a single field,
-? means that the previous argument is optional, * means that the
-previous argument can occur any number of times. */
-
-struct {
- const char *name;
- command_t func;
- const char *arg_types;
-} command_table[] = {
- { "include", include_command, "s" },
- { "echo", echo_command, "s*" },
- { "capitalize", capitalize_command, "f?" },
- { "accumulate", accumulate_command, "" },
- { "no-accumulate", no_accumulate_command, "" },
- { "move-punctuation", move_punctuation_command, "" },
- { "no-move-punctuation", no_move_punctuation_command, "" },
- { "sort", sort_command, "s?" },
- { "no-sort", no_sort_command, "" },
- { "articles", articles_command, "s*" },
- { "database", database_command, "ss*" },
- { "default-database", default_database_command, "" },
- { "no-default-database", no_default_database_command, "" },
- { "bibliography", bibliography_command, "ss*" },
- { "annotate", annotate_command, "F?s?" },
- { "no-annotate", no_annotate_command, "" },
- { "reverse", reverse_command, "s" },
- { "no-reverse", no_reverse_command, "" },
- { "abbreviate", abbreviate_command, "ss?s?s?s?" },
- { "no-abbreviate", no_abbreviate_command, "" },
- { "search-ignore", search_ignore_command, "f?" },
- { "no-search-ignore", no_search_ignore_command, "" },
- { "search-truncate", search_truncate_command, "i?" },
- { "no-search-truncate", no_search_truncate_command, "" },
- { "discard", discard_command, "f?" },
- { "no-discard", no_discard_command, "" },
- { "label", label_command, "s" },
- { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" },
- { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" },
- { "label-in-reference", label_in_reference_command, "" },
- { "no-label-in-reference", no_label_in_reference_command, "" },
- { "label-in-text", label_in_text_command, "" },
- { "no-label-in-text", no_label_in_text_command, "" },
- { "sort-adjacent-labels", sort_adjacent_labels_command, "" },
- { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" },
- { "date-as-label", date_as_label_command, "s?" },
- { "no-date-as-label", no_date_as_label_command, "" },
- { "short-label", short_label_command, "s" },
- { "no-short-label", no_short_label_command, "" },
- { "compatible", compatible_command, "" },
- { "no-compatible", no_compatible_command, "" },
- { "join-authors", join_authors_command, "sss?" },
- { "bracket-label", bracket_label_command, "sss" },
- { "separate-label-second-parts", separate_label_second_parts_command, "s" },
- { "et-al", et_al_command, "sii?" },
- { "no-et-al", no_et_al_command, "" },
-};
-
-static int check_args(const char *types, const char *name,
- int argc, argument *argv)
-{
- int argno = 0;
- while (*types) {
- if (argc == 0) {
- if (types[1] == '?')
- break;
- else if (types[1] == '*') {
- assert(types[2] == '\0');
- break;
- }
- else {
- input_stack::error("missing argument for command `%1'", name);
- return 0;
- }
- }
- switch (*types) {
- case 's':
- break;
- case 'i':
- {
- char *ptr;
- long n = strtol(argv->s, &ptr, 10);
- if ((n == 0 && ptr == argv->s)
- || *ptr != '\0') {
- input_stack::error("argument %1 for command `%2' must be an integer",
- argno + 1, name);
- return 0;
- }
- argv->n = (int)n;
- break;
- }
- case 'f':
- {
- for (const char *ptr = argv->s; *ptr != '\0'; ptr++)
- if (!cs_field_name(*ptr)) {
- input_stack::error("argument %1 for command `%2' must be a list of fields",
- argno + 1, name);
- return 0;
- }
- break;
- }
- case 'F':
- if (argv->s[0] == '\0' || argv->s[1] != '\0'
- || !cs_field_name(argv->s[0])) {
- input_stack::error("argument %1 for command `%2' must be a field name",
- argno + 1, name);
- return 0;
- }
- break;
- default:
- assert(0);
- }
- if (types[1] == '?')
- types += 2;
- else if (types[1] != '*')
- types += 1;
- --argc;
- ++argv;
- ++argno;
- }
- if (argc > 0) {
- input_stack::error("too many arguments for command `%1'", name);
- return 0;
- }
- return 1;
-}
-
-static void execute_command(const char *name, int argc, argument *argv)
-{
- for (int i = 0; i < sizeof(command_table)/sizeof(command_table[0]); i++)
- if (strcmp(name, command_table[i].name) == 0) {
- if (check_args(command_table[i].arg_types, name, argc, argv))
- (*command_table[i].func)(argc, argv);
- return;
- }
- input_stack::error("unknown command `%1'", name);
-}
-
-static void command_loop()
-{
- string command;
- for (;;) {
- command.clear();
- int res = get_word(command);
- if (res != 1) {
- if (res == 0)
- continue;
- break;
- }
- int argc = 0;
- command += '\0';
- while ((res = get_word(command)) == 1) {
- argc++;
- command += '\0';
- }
- argument *argv = new argument[argc];
- const char *ptr = command.contents();
- for (int i = 0; i < argc; i++)
- argv[i].s = ptr = strchr(ptr, '\0') + 1;
- execute_command(command.contents(), argc, argv);
- a_delete argv;
- if (res == -1)
- break;
- }
-}
-
-void process_commands(const char *file)
-{
- input_stack::init();
- input_stack::push_file(file);
- command_loop();
-}
-
-void process_commands(string &s, const char *file, int lineno)
-{
- input_stack::init();
- input_stack::push_string(s, file, lineno);
- command_loop();
-}
diff --git a/contrib/groff/refer/command.h b/contrib/groff/refer/command.h
deleted file mode 100644
index c7085db6927b..000000000000
--- a/contrib/groff/refer/command.h
+++ /dev/null
@@ -1,36 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-void process_commands(const char *file);
-void process_commands(string &s, const char *file, int lineno);
-
-extern int accumulate;
-extern int move_punctuation;
-extern int search_default;
-extern search_list database_list;
-extern int label_in_text;
-extern int label_in_reference;
-extern int sort_adjacent_labels;
-extern string pre_label;
-extern string post_label;
-extern string sep_label;
-
-extern void do_bib(const char *);
-extern void output_references();
diff --git a/contrib/groff/refer/label.y b/contrib/groff/refer/label.y
deleted file mode 100644
index 2c7c90951ab0..000000000000
--- a/contrib/groff/refer/label.y
+++ /dev/null
@@ -1,1177 +0,0 @@
-/* -*- C++ -*-
- Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-%{
-
-#include "refer.h"
-#include "refid.h"
-#include "ref.h"
-#include "token.h"
-
-int yylex();
-void yyerror(const char *);
-int yyparse();
-
-static const char *format_serial(char c, int n);
-
-struct label_info {
- int start;
- int length;
- int count;
- int total;
- label_info(const string &);
-};
-
-label_info *lookup_label(const string &label);
-
-struct expression {
- enum {
- // Does the tentative label depend on the reference?
- CONTAINS_VARIABLE = 01,
- CONTAINS_STAR = 02,
- CONTAINS_FORMAT = 04,
- CONTAINS_AT = 010
- };
- virtual ~expression() { }
- virtual void evaluate(int, const reference &, string &,
- substring_position &) = 0;
- virtual unsigned analyze() { return 0; }
-};
-
-class at_expr : public expression {
-public:
- at_expr() { }
- void evaluate(int, const reference &, string &, substring_position &);
- unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
-};
-
-class format_expr : public expression {
- char type;
- int width;
- int first_number;
-public:
- format_expr(char c, int w = 0, int f = 1)
- : type(c), width(w), first_number(f) { }
- void evaluate(int, const reference &, string &, substring_position &);
- unsigned analyze() { return CONTAINS_FORMAT; }
-};
-
-class field_expr : public expression {
- int number;
- char name;
-public:
- field_expr(char nm, int num) : number(num), name(nm) { }
- void evaluate(int, const reference &, string &, substring_position &);
- unsigned analyze() { return CONTAINS_VARIABLE; }
-};
-
-class literal_expr : public expression {
- string s;
-public:
- literal_expr(const char *ptr, int len) : s(ptr, len) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class unary_expr : public expression {
-protected:
- expression *expr;
-public:
- unary_expr(expression *e) : expr(e) { }
- ~unary_expr() { delete expr; }
- void evaluate(int, const reference &, string &, substring_position &) = 0;
- unsigned analyze() { return expr ? expr->analyze() : 0; }
-};
-
-// This caches the analysis of an expression.
-
-class analyzed_expr : public unary_expr {
- unsigned flags;
-public:
- analyzed_expr(expression *);
- void evaluate(int, const reference &, string &, substring_position &);
- unsigned analyze() { return flags; }
-};
-
-class star_expr : public unary_expr {
-public:
- star_expr(expression *e) : unary_expr(e) { }
- void evaluate(int, const reference &, string &, substring_position &);
- unsigned analyze() {
- return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
- | CONTAINS_STAR);
- }
-};
-
-typedef void map_func(const char *, const char *, string &);
-
-class map_expr : public unary_expr {
- map_func *func;
-public:
- map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-typedef const char *extractor_func(const char *, const char *, const char **);
-
-class extractor_expr : public unary_expr {
- int part;
- extractor_func *func;
-public:
- enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
- extractor_expr(expression *e, extractor_func *f, int pt)
- : unary_expr(e), part(pt), func(f) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class truncate_expr : public unary_expr {
- int n;
-public:
- truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class separator_expr : public unary_expr {
-public:
- separator_expr(expression *e) : unary_expr(e) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class binary_expr : public expression {
-protected:
- expression *expr1;
- expression *expr2;
-public:
- binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
- ~binary_expr() { delete expr1; delete expr2; }
- void evaluate(int, const reference &, string &, substring_position &) = 0;
- unsigned analyze() {
- return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
- }
-};
-
-class alternative_expr : public binary_expr {
-public:
- alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class list_expr : public binary_expr {
-public:
- list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class substitute_expr : public binary_expr {
-public:
- substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-class ternary_expr : public expression {
-protected:
- expression *expr1;
- expression *expr2;
- expression *expr3;
-public:
- ternary_expr(expression *e1, expression *e2, expression *e3)
- : expr1(e1), expr2(e2), expr3(e3) { }
- ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
- void evaluate(int, const reference &, string &, substring_position &) = 0;
- unsigned analyze() {
- return ((expr1 ? expr1->analyze() : 0)
- | (expr2 ? expr2->analyze() : 0)
- | (expr3 ? expr3->analyze() : 0));
- }
-};
-
-class conditional_expr : public ternary_expr {
-public:
- conditional_expr(expression *e1, expression *e2, expression *e3)
- : ternary_expr(e1, e2, e3) { }
- void evaluate(int, const reference &, string &, substring_position &);
-};
-
-static expression *parsed_label = 0;
-static expression *parsed_date_label = 0;
-static expression *parsed_short_label = 0;
-
-static expression *parse_result;
-
-string literals;
-
-%}
-
-%union {
- int num;
- expression *expr;
- struct { int ndigits; int val; } dig;
- struct { int start; int len; } str;
-}
-
-/* uppercase or lowercase letter */
-%token <num> TOKEN_LETTER
-/* literal characters */
-%token <str> TOKEN_LITERAL
-/* digit */
-%token <num> TOKEN_DIGIT
-
-%type <expr> conditional
-%type <expr> alternative
-%type <expr> list
-%type <expr> string
-%type <expr> substitute
-%type <expr> optional_conditional
-%type <num> number
-%type <dig> digits
-%type <num> optional_number
-%type <num> flag
-
-%%
-
-expr:
- optional_conditional
- { parse_result = ($1 ? new analyzed_expr($1) : 0); }
- ;
-
-conditional:
- alternative
- { $$ = $1; }
- | alternative '?' optional_conditional ':' conditional
- { $$ = new conditional_expr($1, $3, $5); }
- ;
-
-optional_conditional:
- /* empty */
- { $$ = 0; }
- | conditional
- { $$ = $1; }
- ;
-
-alternative:
- list
- { $$ = $1; }
- | alternative '|' list
- { $$ = new alternative_expr($1, $3); }
- | alternative '&' list
- { $$ = new conditional_expr($1, $3, 0); }
- ;
-
-list:
- substitute
- { $$ = $1; }
- | list substitute
- { $$ = new list_expr($1, $2); }
- ;
-
-substitute:
- string
- { $$ = $1; }
- | substitute '~' string
- { $$ = new substitute_expr($1, $3); }
- ;
-
-string:
- '@'
- { $$ = new at_expr; }
- | TOKEN_LITERAL
- {
- $$ = new literal_expr(literals.contents() + $1.start,
- $1.len);
- }
- | TOKEN_LETTER
- { $$ = new field_expr($1, 0); }
- | TOKEN_LETTER number
- { $$ = new field_expr($1, $2 - 1); }
- | '%' TOKEN_LETTER
- {
- switch ($2) {
- case 'I':
- case 'i':
- case 'A':
- case 'a':
- $$ = new format_expr($2);
- break;
- default:
- command_error("unrecognized format `%1'", char($2));
- $$ = new format_expr('a');
- break;
- }
- }
-
- | '%' digits
- {
- $$ = new format_expr('0', $2.ndigits, $2.val);
- }
- | string '.' flag TOKEN_LETTER optional_number
- {
- switch ($4) {
- case 'l':
- $$ = new map_expr($1, lowercase);
- break;
- case 'u':
- $$ = new map_expr($1, uppercase);
- break;
- case 'c':
- $$ = new map_expr($1, capitalize);
- break;
- case 'r':
- $$ = new map_expr($1, reverse_name);
- break;
- case 'a':
- $$ = new map_expr($1, abbreviate_name);
- break;
- case 'y':
- $$ = new extractor_expr($1, find_year, $3);
- break;
- case 'n':
- $$ = new extractor_expr($1, find_last_name, $3);
- break;
- default:
- $$ = $1;
- command_error("unknown function `%1'", char($4));
- break;
- }
- }
-
- | string '+' number
- { $$ = new truncate_expr($1, $3); }
- | string '-' number
- { $$ = new truncate_expr($1, -$3); }
- | string '*'
- { $$ = new star_expr($1); }
- | '(' optional_conditional ')'
- { $$ = $2; }
- | '<' optional_conditional '>'
- { $$ = new separator_expr($2); }
- ;
-
-optional_number:
- /* empty */
- { $$ = -1; }
- | number
- { $$ = $1; }
- ;
-
-number:
- TOKEN_DIGIT
- { $$ = $1; }
- | number TOKEN_DIGIT
- { $$ = $1*10 + $2; }
- ;
-
-digits:
- TOKEN_DIGIT
- { $$.ndigits = 1; $$.val = $1; }
- | digits TOKEN_DIGIT
- { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
- ;
-
-
-flag:
- /* empty */
- { $$ = 0; }
- | '+'
- { $$ = 1; }
- | '-'
- { $$ = -1; }
- ;
-
-%%
-
-/* bison defines const to be empty unless __STDC__ is defined, which it
-isn't under cfront */
-
-#ifdef const
-#undef const
-#endif
-
-const char *spec_ptr;
-const char *spec_end;
-const char *spec_cur;
-
-int yylex()
-{
- while (spec_ptr < spec_end && csspace(*spec_ptr))
- spec_ptr++;
- spec_cur = spec_ptr;
- if (spec_ptr >= spec_end)
- return 0;
- unsigned char c = *spec_ptr++;
- if (csalpha(c)) {
- yylval.num = c;
- return TOKEN_LETTER;
- }
- if (csdigit(c)) {
- yylval.num = c - '0';
- return TOKEN_DIGIT;
- }
- if (c == '\'') {
- yylval.str.start = literals.length();
- for (; spec_ptr < spec_end; spec_ptr++) {
- if (*spec_ptr == '\'') {
- if (++spec_ptr < spec_end && *spec_ptr == '\'')
- literals += '\'';
- else {
- yylval.str.len = literals.length() - yylval.str.start;
- return TOKEN_LITERAL;
- }
- }
- else
- literals += *spec_ptr;
- }
- yylval.str.len = literals.length() - yylval.str.start;
- return TOKEN_LITERAL;
- }
- return c;
-}
-
-int set_label_spec(const char *label_spec)
-{
- spec_cur = spec_ptr = label_spec;
- spec_end = strchr(label_spec, '\0');
- literals.clear();
- if (yyparse())
- return 0;
- delete parsed_label;
- parsed_label = parse_result;
- return 1;
-}
-
-int set_date_label_spec(const char *label_spec)
-{
- spec_cur = spec_ptr = label_spec;
- spec_end = strchr(label_spec, '\0');
- literals.clear();
- if (yyparse())
- return 0;
- delete parsed_date_label;
- parsed_date_label = parse_result;
- return 1;
-}
-
-int set_short_label_spec(const char *label_spec)
-{
- spec_cur = spec_ptr = label_spec;
- spec_end = strchr(label_spec, '\0');
- literals.clear();
- if (yyparse())
- return 0;
- delete parsed_short_label;
- parsed_short_label = parse_result;
- return 1;
-}
-
-void yyerror(const char *message)
-{
- if (spec_cur < spec_end)
- command_error("label specification %1 before `%2'", message, spec_cur);
- else
- command_error("label specification %1 at end of string",
- message, spec_cur);
-}
-
-void at_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &)
-{
- if (tentative)
- ref.canonicalize_authors(result);
- else {
- const char *end, *start = ref.get_authors(&end);
- if (start)
- result.append(start, end - start);
- }
-}
-
-void format_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &)
-{
- if (tentative)
- return;
- const label_info *lp = ref.get_label_ptr();
- int num = lp == 0 ? ref.get_number() : lp->count;
- if (type != '0')
- result += format_serial(type, num + 1);
- else {
- const char *ptr = i_to_a(num + first_number);
- int pad = width - strlen(ptr);
- while (--pad >= 0)
- result += '0';
- result += ptr;
- }
-}
-
-static const char *format_serial(char c, int n)
-{
- assert(n > 0);
- static char buf[128]; // more than enough.
- switch (c) {
- case 'i':
- case 'I':
- {
- char *p = buf;
- // troff uses z and w to represent 10000 and 5000 in Roman
- // numerals; I can find no historical basis for this usage
- const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
- if (n >= 40000)
- return i_to_a(n);
- while (n >= 10000) {
- *p++ = s[0];
- n -= 10000;
- }
- for (int i = 1000; i > 0; i /= 10, s += 2) {
- int m = n/i;
- n -= m*i;
- switch (m) {
- case 3:
- *p++ = s[2];
- /* falls through */
- case 2:
- *p++ = s[2];
- /* falls through */
- case 1:
- *p++ = s[2];
- break;
- case 4:
- *p++ = s[2];
- *p++ = s[1];
- break;
- case 8:
- *p++ = s[1];
- *p++ = s[2];
- *p++ = s[2];
- *p++ = s[2];
- break;
- case 7:
- *p++ = s[1];
- *p++ = s[2];
- *p++ = s[2];
- break;
- case 6:
- *p++ = s[1];
- *p++ = s[2];
- break;
- case 5:
- *p++ = s[1];
- break;
- case 9:
- *p++ = s[2];
- *p++ = s[0];
- }
- }
- *p = 0;
- break;
- }
- case 'a':
- case 'A':
- {
- char *p = buf;
- // this is derived from troff/reg.c
- while (n > 0) {
- int d = n % 26;
- if (d == 0)
- d = 26;
- n -= d;
- n /= 26;
- *p++ = c + d - 1; // ASCII dependent
- }
- *p-- = 0;
- // Reverse it.
- char *q = buf;
- while (q < p) {
- char temp = *q;
- *q = *p;
- *p = temp;
- --p;
- ++q;
- }
- break;
- }
- default:
- assert(0);
- }
- return buf;
-}
-
-void field_expr::evaluate(int, const reference &ref,
- string &result, substring_position &)
-{
- const char *end;
- const char *start = ref.get_field(name, &end);
- if (start) {
- start = nth_field(number, start, &end);
- if (start)
- result.append(start, end - start);
- }
-}
-
-void literal_expr::evaluate(int, const reference &,
- string &result, substring_position &)
-{
- result += s;
-}
-
-analyzed_expr::analyzed_expr(expression *e)
-: unary_expr(e), flags(e ? e->analyze() : 0)
-{
-}
-
-void analyzed_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- if (expr)
- expr->evaluate(tentative, ref, result, pos);
-}
-
-void star_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- const label_info *lp = ref.get_label_ptr();
- if (!tentative
- && (lp == 0 || lp->total > 1)
- && expr)
- expr->evaluate(tentative, ref, result, pos);
-}
-
-void separator_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- int start_length = result.length();
- int is_first = pos.start < 0;
- if (expr)
- expr->evaluate(tentative, ref, result, pos);
- if (is_first) {
- pos.start = start_length;
- pos.length = result.length() - start_length;
- }
-}
-
-void map_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &)
-{
- if (expr) {
- string temp;
- substring_position temp_pos;
- expr->evaluate(tentative, ref, temp, temp_pos);
- (*func)(temp.contents(), temp.contents() + temp.length(), result);
- }
-}
-
-void extractor_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &)
-{
- if (expr) {
- string temp;
- substring_position temp_pos;
- expr->evaluate(tentative, ref, temp, temp_pos);
- const char *end, *start = (*func)(temp.contents(),
- temp.contents() + temp.length(),
- &end);
- switch (part) {
- case BEFORE:
- if (start)
- result.append(temp.contents(), start - temp.contents());
- else
- result += temp;
- break;
- case MATCH:
- if (start)
- result.append(start, end - start);
- break;
- case AFTER:
- if (start)
- result.append(end, temp.contents() + temp.length() - end);
- break;
- default:
- assert(0);
- }
- }
-}
-
-static void first_part(int len, const char *ptr, const char *end,
- string &result)
-{
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, end))
- break;
- const token_info *ti = lookup_token(token_start, ptr);
- int counts = ti->sortify_non_empty(token_start, ptr);
- if (counts && --len < 0)
- break;
- if (counts || ti->is_accent())
- result.append(token_start, ptr - token_start);
- }
-}
-
-static void last_part(int len, const char *ptr, const char *end,
- string &result)
-{
- const char *start = ptr;
- int count = 0;
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, end))
- break;
- const token_info *ti = lookup_token(token_start, ptr);
- if (ti->sortify_non_empty(token_start, ptr))
- count++;
- }
- ptr = start;
- int skip = count - len;
- if (skip > 0) {
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, end))
- assert(0);
- const token_info *ti = lookup_token(token_start, ptr);
- if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
- ptr = token_start;
- break;
- }
- }
- }
- first_part(len, ptr, end, result);
-}
-
-void truncate_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &)
-{
- if (expr) {
- string temp;
- substring_position temp_pos;
- expr->evaluate(tentative, ref, temp, temp_pos);
- const char *start = temp.contents();
- const char *end = start + temp.length();
- if (n > 0)
- first_part(n, start, end, result);
- else if (n < 0)
- last_part(-n, start, end, result);
- }
-}
-
-void alternative_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- int start_length = result.length();
- if (expr1)
- expr1->evaluate(tentative, ref, result, pos);
- if (result.length() == start_length && expr2)
- expr2->evaluate(tentative, ref, result, pos);
-}
-
-void list_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- if (expr1)
- expr1->evaluate(tentative, ref, result, pos);
- if (expr2)
- expr2->evaluate(tentative, ref, result, pos);
-}
-
-void substitute_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- int start_length = result.length();
- if (expr1)
- expr1->evaluate(tentative, ref, result, pos);
- if (result.length() > start_length && result[result.length() - 1] == '-') {
- // ought to see if pos covers the -
- result.set_length(result.length() - 1);
- if (expr2)
- expr2->evaluate(tentative, ref, result, pos);
- }
-}
-
-void conditional_expr::evaluate(int tentative, const reference &ref,
- string &result, substring_position &pos)
-{
- string temp;
- substring_position temp_pos;
- if (expr1)
- expr1->evaluate(tentative, ref, temp, temp_pos);
- if (temp.length() > 0) {
- if (expr2)
- expr2->evaluate(tentative, ref, result, pos);
- }
- else {
- if (expr3)
- expr3->evaluate(tentative, ref, result, pos);
- }
-}
-
-void reference::pre_compute_label()
-{
- if (parsed_label != 0
- && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
- label.clear();
- substring_position temp_pos;
- parsed_label->evaluate(1, *this, label, temp_pos);
- label_ptr = lookup_label(label);
- }
-}
-
-void reference::compute_label()
-{
- label.clear();
- if (parsed_label)
- parsed_label->evaluate(0, *this, label, separator_pos);
- if (short_label_flag && parsed_short_label)
- parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
- if (date_as_label) {
- string new_date;
- if (parsed_date_label) {
- substring_position temp_pos;
- parsed_date_label->evaluate(0, *this, new_date, temp_pos);
- }
- set_date(new_date);
- }
- if (label_ptr)
- label_ptr->count += 1;
-}
-
-void reference::immediate_compute_label()
-{
- if (label_ptr)
- label_ptr->total = 2; // force use of disambiguator
- compute_label();
-}
-
-int reference::merge_labels(reference **v, int n, label_type type,
- string &result)
-{
- if (abbreviate_label_ranges)
- return merge_labels_by_number(v, n, type, result);
- else
- return merge_labels_by_parts(v, n, type, result);
-}
-
-int reference::merge_labels_by_number(reference **v, int n, label_type type,
- string &result)
-{
- if (n <= 1)
- return 0;
- int num = get_number();
- // Only merge three or more labels.
- if (v[0]->get_number() != num + 1
- || v[1]->get_number() != num + 2)
- return 0;
- int i;
- for (i = 2; i < n; i++)
- if (v[i]->get_number() != num + i + 1)
- break;
- result = get_label(type);
- result += label_range_indicator;
- result += v[i - 1]->get_label(type);
- return i;
-}
-
-const substring_position &reference::get_separator_pos(label_type type) const
-{
- if (type == SHORT_LABEL && short_label_flag)
- return short_separator_pos;
- else
- return separator_pos;
-}
-
-const string &reference::get_label(label_type type) const
-{
- if (type == SHORT_LABEL && short_label_flag)
- return short_label;
- else
- return label;
-}
-
-int reference::merge_labels_by_parts(reference **v, int n, label_type type,
- string &result)
-{
- if (n <= 0)
- return 0;
- const string &lb = get_label(type);
- const substring_position &sp = get_separator_pos(type);
- if (sp.start < 0
- || sp.start != v[0]->get_separator_pos(type).start
- || memcmp(lb.contents(), v[0]->get_label(type).contents(),
- sp.start) != 0)
- return 0;
- result = lb;
- int i = 0;
- do {
- result += separate_label_second_parts;
- const substring_position &s = v[i]->get_separator_pos(type);
- int sep_end_pos = s.start + s.length;
- result.append(v[i]->get_label(type).contents() + sep_end_pos,
- v[i]->get_label(type).length() - sep_end_pos);
- } while (++i < n
- && sp.start == v[i]->get_separator_pos(type).start
- && memcmp(lb.contents(), v[i]->get_label(type).contents(),
- sp.start) == 0);
- return i;
-}
-
-string label_pool;
-
-label_info::label_info(const string &s)
-: start(label_pool.length()), length(s.length()), count(0), total(1)
-{
- label_pool += s;
-}
-
-static label_info **label_table = 0;
-static int label_table_size = 0;
-static int label_table_used = 0;
-
-label_info *lookup_label(const string &label)
-{
- if (label_table == 0) {
- label_table = new label_info *[17];
- label_table_size = 17;
- for (int i = 0; i < 17; i++)
- label_table[i] = 0;
- }
- unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
- label_info **ptr;
- for (ptr = label_table + h;
- *ptr != 0;
- (ptr == label_table)
- ? (ptr = label_table + label_table_size - 1)
- : ptr--)
- if ((*ptr)->length == label.length()
- && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
- label.length()) == 0) {
- (*ptr)->total += 1;
- return *ptr;
- }
- label_info *result = *ptr = new label_info(label);
- if (++label_table_used * 2 > label_table_size) {
- // Rehash the table.
- label_info **old_table = label_table;
- int old_size = label_table_size;
- label_table_size = next_size(label_table_size);
- label_table = new label_info *[label_table_size];
- int i;
- for (i = 0; i < label_table_size; i++)
- label_table[i] = 0;
- for (i = 0; i < old_size; i++)
- if (old_table[i]) {
- unsigned h = hash_string(label_pool.contents() + old_table[i]->start,
- old_table[i]->length);
- label_info **p;
- for (p = label_table + (h % label_table_size);
- *p != 0;
- (p == label_table)
- ? (p = label_table + label_table_size - 1)
- : --p)
- ;
- *p = old_table[i];
- }
- a_delete old_table;
- }
- return result;
-}
-
-void clear_labels()
-{
- for (int i = 0; i < label_table_size; i++) {
- delete label_table[i];
- label_table[i] = 0;
- }
- label_table_used = 0;
- label_pool.clear();
-}
-
-static void consider_authors(reference **start, reference **end, int i);
-
-void compute_labels(reference **v, int n)
-{
- if (parsed_label
- && (parsed_label->analyze() & expression::CONTAINS_AT)
- && sort_fields.length() >= 2
- && sort_fields[0] == 'A'
- && sort_fields[1] == '+')
- consider_authors(v, v + n, 0);
- for (int i = 0; i < n; i++)
- v[i]->compute_label();
-}
-
-
-/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
-where 0 <= i <= N if there exists a reference with a list of authors
-<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
-and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
-A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
-<B0,B1,...,BM>. If a reference needs author i we only have to call
-need_author(j) for some j >= i such that the reference also needs
-author j. */
-
-/* This function handles 2 tasks:
-determine which authors are needed (cannot be elided with et al.);
-determine which authors can have only last names in the labels.
-
-References >= start and < end have the same first i author names.
-Also they're sorted by A+. */
-
-static void consider_authors(reference **start, reference **end, int i)
-{
- if (start >= end)
- return;
- reference **p = start;
- if (i >= (*p)->get_nauthors()) {
- for (++p; p < end && i >= (*p)->get_nauthors(); p++)
- ;
- if (p < end && i > 0) {
- // If we have an author list <A B C> and an author list <A B C D>,
- // then both lists need C.
- for (reference **q = start; q < end; q++)
- (*q)->need_author(i - 1);
- }
- start = p;
- }
- while (p < end) {
- reference **last_name_start = p;
- reference **name_start = p;
- for (++p;
- p < end && i < (*p)->get_nauthors()
- && same_author_last_name(**last_name_start, **p, i);
- p++) {
- if (!same_author_name(**name_start, **p, i)) {
- consider_authors(name_start, p, i + 1);
- name_start = p;
- }
- }
- consider_authors(name_start, p, i + 1);
- if (last_name_start == name_start) {
- for (reference **q = last_name_start; q < p; q++)
- (*q)->set_last_name_unambiguous(i);
- }
- // If we have an author list <A B C D> and <A B C E>, then the lists
- // need author D and E respectively.
- if (name_start > start || p < end) {
- for (reference **q = last_name_start; q < p; q++)
- (*q)->need_author(i);
- }
- }
-}
-
-int same_author_last_name(const reference &r1, const reference &r2, int n)
-{
- const char *ae1;
- const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
- assert(as1 != 0);
- const char *ae2;
- const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
- assert(as2 != 0);
- return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
-}
-
-int same_author_name(const reference &r1, const reference &r2, int n)
-{
- const char *ae1;
- const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
- assert(as1 != 0);
- const char *ae2;
- const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
- assert(as2 != 0);
- return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
-}
-
-
-void int_set::set(int i)
-{
- assert(i >= 0);
- int bytei = i >> 3;
- if (bytei >= v.length()) {
- int old_length = v.length();
- v.set_length(bytei + 1);
- for (int j = old_length; j <= bytei; j++)
- v[j] = 0;
- }
- v[bytei] |= 1 << (i & 7);
-}
-
-int int_set::get(int i) const
-{
- assert(i >= 0);
- int bytei = i >> 3;
- return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
-}
-
-void reference::set_last_name_unambiguous(int i)
-{
- last_name_unambiguous.set(i);
-}
-
-void reference::need_author(int n)
-{
- if (n > last_needed_author)
- last_needed_author = n;
-}
-
-const char *reference::get_authors(const char **end) const
-{
- if (!computed_authors) {
- ((reference *)this)->computed_authors = 1;
- string &result = ((reference *)this)->authors;
- int na = get_nauthors();
- result.clear();
- for (int i = 0; i < na; i++) {
- if (last_name_unambiguous.get(i)) {
- const char *e, *start = get_author_last_name(i, &e);
- assert(start != 0);
- result.append(start, e - start);
- }
- else {
- const char *e, *start = get_author(i, &e);
- assert(start != 0);
- result.append(start, e - start);
- }
- if (i == last_needed_author
- && et_al.length() > 0
- && et_al_min_elide > 0
- && last_needed_author + et_al_min_elide < na
- && na >= et_al_min_total) {
- result += et_al;
- break;
- }
- if (i < na - 1) {
- if (na == 2)
- result += join_authors_exactly_two;
- else if (i < na - 2)
- result += join_authors_default;
- else
- result += join_authors_last_two;
- }
- }
- }
- const char *start = authors.contents();
- *end = start + authors.length();
- return start;
-}
-
-int reference::get_nauthors() const
-{
- if (nauthors < 0) {
- const char *dummy;
- int na;
- for (na = 0; get_author(na, &dummy) != 0; na++)
- ;
- ((reference *)this)->nauthors = na;
- }
- return nauthors;
-}
diff --git a/contrib/groff/refer/ref.cc b/contrib/groff/refer/ref.cc
deleted file mode 100644
index c3517b194598..000000000000
--- a/contrib/groff/refer/ref.cc
+++ /dev/null
@@ -1,1160 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
-Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#include "refer.h"
-#include "refid.h"
-#include "ref.h"
-#include "token.h"
-
-static const char *find_day(const char *, const char *, const char **);
-static int find_month(const char *start, const char *end);
-static void abbreviate_names(string &);
-
-#define DEFAULT_ARTICLES "the\000a\000an"
-
-string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES));
-
-// Multiple occurrences of fields are separated by FIELD_SEPARATOR.
-const char FIELD_SEPARATOR = '\0';
-
-const char MULTI_FIELD_NAMES[] = "AE";
-const char *AUTHOR_FIELDS = "AQ";
-
-enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM };
-
-const char *reference_types[] = {
- "other",
- "journal-article",
- "book",
- "article-in-book",
- "tech-report",
- "bell-tm",
-};
-
-static string temp_fields[256];
-
-reference::reference(const char *start, int len, reference_id *ridp)
-: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0),
- computed_authors(0), last_needed_author(-1), nauthors(-1)
-{
- int i;
- for (i = 0; i < 256; i++)
- field_index[i] = NULL_FIELD_INDEX;
- if (ridp)
- rid = *ridp;
- if (start == 0)
- return;
- if (len <= 0)
- return;
- const char *end = start + len;
- const char *ptr = start;
- assert(*ptr == '%');
- while (ptr < end) {
- if (ptr + 1 < end && ptr[1] != '\0'
- && ((ptr[1] != '%' && ptr[1] == annotation_field)
- || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0'
- && discard_fields.search(ptr[2]) < 0))) {
- if (ptr[1] == '%')
- ptr++;
- string &f = temp_fields[(unsigned char)ptr[1]];
- ptr += 2;
- while (ptr < end && csspace(*ptr))
- ptr++;
- for (;;) {
- for (;;) {
- if (ptr >= end) {
- f += '\n';
- break;
- }
- f += *ptr;
- if (*ptr++ == '\n')
- break;
- }
- if (ptr >= end || *ptr == '%')
- break;
- }
- }
- else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%'
- && discard_fields.search(ptr[1]) < 0) {
- string &f = temp_fields[(unsigned char)ptr[1]];
- if (f.length() > 0) {
- if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0)
- f += FIELD_SEPARATOR;
- else
- f.clear();
- }
- ptr += 2;
- if (ptr < end) {
- if (*ptr == ' ')
- ptr++;
- for (;;) {
- const char *p = ptr;
- while (ptr < end && *ptr != '\n')
- ptr++;
- // strip trailing white space
- const char *q = ptr;
- while (q > p && q[-1] != '\n' && csspace(q[-1]))
- q--;
- while (p < q)
- f += *p++;
- if (ptr >= end)
- break;
- ptr++;
- if (ptr >= end)
- break;
- if (*ptr == '%')
- break;
- f += ' ';
- }
- }
- }
- else {
- // skip this field
- for (;;) {
- while (ptr < end && *ptr++ != '\n')
- ;
- if (ptr >= end || *ptr == '%')
- break;
- }
- }
- }
- for (i = 0; i < 256; i++)
- if (temp_fields[i].length() > 0)
- nfields++;
- field = new string[nfields];
- int j = 0;
- for (i = 0; i < 256; i++)
- if (temp_fields[i].length() > 0) {
- field[j].move(temp_fields[i]);
- if (abbreviate_fields.search(i) >= 0)
- abbreviate_names(field[j]);
- field_index[i] = j;
- j++;
- }
-}
-
-reference::~reference()
-{
- if (nfields > 0)
- ad_delete(nfields) field;
-}
-
-// ref is the inline, this is the database ref
-
-void reference::merge(reference &ref)
-{
- int i;
- for (i = 0; i < 256; i++)
- if (field_index[i] != NULL_FIELD_INDEX)
- temp_fields[i].move(field[field_index[i]]);
- for (i = 0; i < 256; i++)
- if (ref.field_index[i] != NULL_FIELD_INDEX)
- temp_fields[i].move(ref.field[ref.field_index[i]]);
- for (i = 0; i < 256; i++)
- field_index[i] = NULL_FIELD_INDEX;
- int old_nfields = nfields;
- nfields = 0;
- for (i = 0; i < 256; i++)
- if (temp_fields[i].length() > 0)
- nfields++;
- if (nfields != old_nfields) {
- if (old_nfields > 0)
- ad_delete(old_nfields) field;
- field = new string[nfields];
- }
- int j = 0;
- for (i = 0; i < 256; i++)
- if (temp_fields[i].length() > 0) {
- field[j].move(temp_fields[i]);
- field_index[i] = j;
- j++;
- }
- merged = 1;
-}
-
-void reference::insert_field(unsigned char c, string &s)
-{
- assert(s.length() > 0);
- if (field_index[c] != NULL_FIELD_INDEX) {
- field[field_index[c]].move(s);
- return;
- }
- assert(field_index[c] == NULL_FIELD_INDEX);
- string *old_field = field;
- field = new string[nfields + 1];
- int pos = 0;
- int i;
- for (i = 0; i < int(c); i++)
- if (field_index[i] != NULL_FIELD_INDEX)
- pos++;
- for (i = 0; i < pos; i++)
- field[i].move(old_field[i]);
- field[pos].move(s);
- for (i = pos; i < nfields; i++)
- field[i + 1].move(old_field[i]);
- if (nfields > 0)
- ad_delete(nfields) old_field;
- nfields++;
- field_index[c] = pos;
- for (i = c + 1; i < 256; i++)
- if (field_index[i] != NULL_FIELD_INDEX)
- field_index[i] += 1;
-}
-
-void reference::delete_field(unsigned char c)
-{
- if (field_index[c] == NULL_FIELD_INDEX)
- return;
- string *old_field = field;
- field = new string[nfields - 1];
- int i;
- for (i = 0; i < int(field_index[c]); i++)
- field[i].move(old_field[i]);
- for (i = field_index[c]; i < nfields - 1; i++)
- field[i].move(old_field[i + 1]);
- if (nfields > 0)
- ad_delete(nfields) old_field;
- nfields--;
- field_index[c] = NULL_FIELD_INDEX;
- for (i = c + 1; i < 256; i++)
- if (field_index[i] != NULL_FIELD_INDEX)
- field_index[i] -= 1;
-}
-
-void reference::compute_hash_code()
-{
- if (!rid.is_null())
- h = rid.hash();
- else {
- h = 0;
- for (int i = 0; i < nfields; i++)
- if (field[i].length() > 0) {
- h <<= 4;
- h ^= hash_string(field[i].contents(), field[i].length());
- }
- }
-}
-
-void reference::set_number(int n)
-{
- no = n;
-}
-
-const char SORT_SEP = '\001';
-const char SORT_SUB_SEP = '\002';
-const char SORT_SUB_SUB_SEP = '\003';
-
-// sep specifies additional word separators
-
-void sortify_words(const char *s, const char *end, const char *sep,
- string &result)
-{
- int non_empty = 0;
- int need_separator = 0;
- for (;;) {
- const char *token_start = s;
- if (!get_token(&s, end))
- break;
- if ((s - token_start == 1
- && (*token_start == ' '
- || *token_start == '\n'
- || (sep && *token_start != '\0'
- && strchr(sep, *token_start) != 0)))
- || (s - token_start == 2
- && token_start[0] == '\\' && token_start[1] == ' ')) {
- if (non_empty)
- need_separator = 1;
- }
- else {
- const token_info *ti = lookup_token(token_start, s);
- if (ti->sortify_non_empty(token_start, s)) {
- if (need_separator) {
- result += ' ';
- need_separator = 0;
- }
- ti->sortify(token_start, s, result);
- non_empty = 1;
- }
- }
- }
-}
-
-void sortify_word(const char *s, const char *end, string &result)
-{
- for (;;) {
- const char *token_start = s;
- if (!get_token(&s, end))
- break;
- const token_info *ti = lookup_token(token_start, s);
- ti->sortify(token_start, s, result);
- }
-}
-
-void sortify_other(const char *s, int len, string &key)
-{
- sortify_words(s, s + len, 0, key);
-}
-
-void sortify_title(const char *s, int len, string &key)
-{
- const char *end = s + len;
- for (; s < end && (*s == ' ' || *s == '\n'); s++)
- ;
- const char *ptr = s;
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, end))
- break;
- if (ptr - token_start == 1
- && (*token_start == ' ' || *token_start == '\n'))
- break;
- }
- if (ptr < end) {
- int first_word_len = ptr - s - 1;
- const char *ae = articles.contents() + articles.length();
- for (const char *a = articles.contents();
- a < ae;
- a = strchr(a, '\0') + 1)
- if (first_word_len == strlen(a)) {
- int j;
- for (j = 0; j < first_word_len; j++)
- if (a[j] != cmlower(s[j]))
- break;
- if (j >= first_word_len) {
- s = ptr;
- for (; s < end && (*s == ' ' || *s == '\n'); s++)
- ;
- break;
- }
- }
- }
- sortify_words(s, end, 0, key);
-}
-
-void sortify_name(const char *s, int len, string &key)
-{
- const char *last_name_end;
- const char *last_name = find_last_name(s, s + len, &last_name_end);
- sortify_word(last_name, last_name_end, key);
- key += SORT_SUB_SUB_SEP;
- if (last_name > s)
- sortify_words(s, last_name, ".", key);
- key += SORT_SUB_SUB_SEP;
- if (last_name_end < s + len)
- sortify_words(last_name_end, s + len, ".,", key);
-}
-
-void sortify_date(const char *s, int len, string &key)
-{
- const char *year_end;
- const char *year_start = find_year(s, s + len, &year_end);
- if (!year_start) {
- // Things without years are often `forthcoming', so it makes sense
- // that they sort after things with explicit years.
- key += 'A';
- sortify_words(s, s + len, 0, key);
- return;
- }
- int n = year_end - year_start;
- while (n < 4) {
- key += '0';
- n++;
- }
- while (year_start < year_end)
- key += *year_start++;
- int m = find_month(s, s + len);
- if (m < 0)
- return;
- key += 'A' + m;
- const char *day_end;
- const char *day_start = find_day(s, s + len, &day_end);
- if (!day_start)
- return;
- if (day_end - day_start == 1)
- key += '0';
- while (day_start < day_end)
- key += *day_start++;
-}
-
-// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification.
-
-void sortify_label(const char *s, int len, string &key)
-{
- const char *end = s + len;
- for (;;) {
- const char *ptr;
- for (ptr = s;
- ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP;
- ptr++)
- ;
- if (ptr > s)
- sortify_words(s, ptr, 0, key);
- s = ptr;
- if (s >= end)
- break;
- key += *s++;
- }
-}
-
-void reference::compute_sort_key()
-{
- if (sort_fields.length() == 0)
- return;
- sort_fields += '\0';
- const char *sf = sort_fields.contents();
- while (*sf != '\0') {
- if (sf > sort_fields)
- sort_key += SORT_SEP;
- char f = *sf++;
- int n = 1;
- if (*sf == '+') {
- n = INT_MAX;
- sf++;
- }
- else if (csdigit(*sf)) {
- char *ptr;
- long l = strtol(sf, &ptr, 10);
- if (l == 0 && ptr == sf)
- ;
- else {
- sf = ptr;
- if (l < 0) {
- n = 1;
- }
- else {
- n = int(l);
- }
- }
- }
- if (f == '.')
- sortify_label(label.contents(), label.length(), sort_key);
- else if (f == AUTHOR_FIELDS[0])
- sortify_authors(n, sort_key);
- else
- sortify_field(f, n, sort_key);
- }
- sort_fields.set_length(sort_fields.length() - 1);
-}
-
-void reference::sortify_authors(int n, string &result) const
-{
- for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++)
- if (contains_field(*p)) {
- sortify_field(*p, n, result);
- return;
- }
- sortify_field(AUTHOR_FIELDS[0], n, result);
-}
-
-void reference::canonicalize_authors(string &result) const
-{
- int len = result.length();
- sortify_authors(INT_MAX, result);
- if (result.length() > len)
- result += SORT_SUB_SEP;
-}
-
-void reference::sortify_field(unsigned char f, int n, string &result) const
-{
- typedef void (*sortify_t)(const char *, int, string &);
- sortify_t sortifier = sortify_other;
- switch (f) {
- case 'A':
- case 'E':
- sortifier = sortify_name;
- break;
- case 'D':
- sortifier = sortify_date;
- break;
- case 'B':
- case 'J':
- case 'T':
- sortifier = sortify_title;
- break;
- }
- int fi = field_index[(unsigned char)f];
- if (fi != NULL_FIELD_INDEX) {
- string &str = field[fi];
- const char *start = str.contents();
- const char *end = start + str.length();
- for (int i = 0; i < n && start < end; i++) {
- const char *p = start;
- while (start < end && *start != FIELD_SEPARATOR)
- start++;
- if (i > 0)
- result += SORT_SUB_SEP;
- (*sortifier)(p, start - p, result);
- if (start < end)
- start++;
- }
- }
-}
-
-int compare_reference(const reference &r1, const reference &r2)
-{
- assert(r1.no >= 0);
- assert(r2.no >= 0);
- const char *s1 = r1.sort_key.contents();
- int n1 = r1.sort_key.length();
- const char *s2 = r2.sort_key.contents();
- int n2 = r2.sort_key.length();
- for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2)
- if (*s1 != *s2)
- return (int)(unsigned char)*s1 - (int)(unsigned char)*s2;
- if (n2 > 0)
- return -1;
- if (n1 > 0)
- return 1;
- return r1.no - r2.no;
-}
-
-int same_reference(const reference &r1, const reference &r2)
-{
- if (!r1.rid.is_null() && r1.rid == r2.rid)
- return 1;
- if (r1.h != r2.h)
- return 0;
- if (r1.nfields != r2.nfields)
- return 0;
- int i = 0;
- for (i = 0; i < 256; i++)
- if (r1.field_index != r2.field_index)
- return 0;
- for (i = 0; i < r1.nfields; i++)
- if (r1.field[i] != r2.field[i])
- return 0;
- return 1;
-}
-
-const char *find_last_name(const char *start, const char *end,
- const char **endp)
-{
- const char *ptr = start;
- const char *last_word = start;
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, end))
- break;
- if (ptr - token_start == 1) {
- if (*token_start == ',') {
- *endp = token_start;
- return last_word;
- }
- else if (*token_start == ' ' || *token_start == '\n') {
- if (ptr < end && *ptr != ' ' && *ptr != '\n')
- last_word = ptr;
- }
- }
- }
- *endp = end;
- return last_word;
-}
-
-void abbreviate_name(const char *ptr, const char *end, string &result)
-{
- const char *last_name_end;
- const char *last_name_start = find_last_name(ptr, end, &last_name_end);
- int need_period = 0;
- for (;;) {
- const char *token_start = ptr;
- if (!get_token(&ptr, last_name_start))
- break;
- const token_info *ti = lookup_token(token_start, ptr);
- if (need_period) {
- if ((ptr - token_start == 1 && *token_start == ' ')
- || (ptr - token_start == 2 && token_start[0] == '\\'
- && token_start[1] == ' '))
- continue;
- if (ti->is_upper())
- result += period_before_initial;
- else
- result += period_before_other;
- need_period = 0;
- }
- result.append(token_start, ptr - token_start);
- if (ti->is_upper()) {
- const char *lower_ptr = ptr;
- int first_token = 1;
- for (;;) {
- token_start = ptr;
- if (!get_token(&ptr, last_name_start))
- break;
- if ((ptr - token_start == 1 && *token_start == ' ')
- || (ptr - token_start == 2 && token_start[0] == '\\'
- && token_start[1] == ' '))
- break;
- ti = lookup_token(token_start, ptr);
- if (ti->is_hyphen()) {
- const char *ptr1 = ptr;
- if (get_token(&ptr1, last_name_start)) {
- ti = lookup_token(ptr, ptr1);
- if (ti->is_upper()) {
- result += period_before_hyphen;
- result.append(token_start, ptr1 - token_start);
- ptr = ptr1;
- }
- }
- }
- else if (ti->is_upper()) {
- // MacDougal -> MacD.
- result.append(lower_ptr, ptr - lower_ptr);
- lower_ptr = ptr;
- first_token = 1;
- }
- else if (first_token && ti->is_accent()) {
- result.append(token_start, ptr - token_start);
- lower_ptr = ptr;
- }
- first_token = 0;
- }
- need_period = 1;
- }
- }
- if (need_period)
- result += period_before_last_name;
- result.append(last_name_start, end - last_name_start);
-}
-
-static void abbreviate_names(string &result)
-{
- string str;
- str.move(result);
- const char *ptr = str.contents();
- const char *end = ptr + str.length();
- while (ptr < end) {
- const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
- if (name_end == 0)
- name_end = end;
- abbreviate_name(ptr, name_end, result);
- if (name_end >= end)
- break;
- ptr = name_end + 1;
- result += FIELD_SEPARATOR;
- }
-}
-
-void reverse_name(const char *ptr, const char *name_end, string &result)
-{
- const char *last_name_end;
- const char *last_name_start = find_last_name(ptr, name_end, &last_name_end);
- result.append(last_name_start, last_name_end - last_name_start);
- while (last_name_start > ptr
- && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n'))
- last_name_start--;
- if (last_name_start > ptr) {
- result += ", ";
- result.append(ptr, last_name_start - ptr);
- }
- if (last_name_end < name_end)
- result.append(last_name_end, name_end - last_name_end);
-}
-
-void reverse_names(string &result, int n)
-{
- if (n <= 0)
- return;
- string str;
- str.move(result);
- const char *ptr = str.contents();
- const char *end = ptr + str.length();
- while (ptr < end) {
- if (--n < 0) {
- result.append(ptr, end - ptr);
- break;
- }
- const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
- if (name_end == 0)
- name_end = end;
- reverse_name(ptr, name_end, result);
- if (name_end >= end)
- break;
- ptr = name_end + 1;
- result += FIELD_SEPARATOR;
- }
-}
-
-// Return number of field separators.
-
-int join_fields(string &f)
-{
- const char *ptr = f.contents();
- int len = f.length();
- int nfield_seps = 0;
- int j;
- for (j = 0; j < len; j++)
- if (ptr[j] == FIELD_SEPARATOR)
- nfield_seps++;
- if (nfield_seps == 0)
- return 0;
- string temp;
- int field_seps_left = nfield_seps;
- for (j = 0; j < len; j++) {
- if (ptr[j] == FIELD_SEPARATOR) {
- if (nfield_seps == 1)
- temp += join_authors_exactly_two;
- else if (--field_seps_left == 0)
- temp += join_authors_last_two;
- else
- temp += join_authors_default;
- }
- else
- temp += ptr[j];
- }
- f = temp;
- return nfield_seps;
-}
-
-void uppercase(const char *start, const char *end, string &result)
-{
- for (;;) {
- const char *token_start = start;
- if (!get_token(&start, end))
- break;
- const token_info *ti = lookup_token(token_start, start);
- ti->upper_case(token_start, start, result);
- }
-}
-
-void lowercase(const char *start, const char *end, string &result)
-{
- for (;;) {
- const char *token_start = start;
- if (!get_token(&start, end))
- break;
- const token_info *ti = lookup_token(token_start, start);
- ti->lower_case(token_start, start, result);
- }
-}
-
-void capitalize(const char *ptr, const char *end, string &result)
-{
- int in_small_point_size = 0;
- for (;;) {
- const char *start = ptr;
- if (!get_token(&ptr, end))
- break;
- const token_info *ti = lookup_token(start, ptr);
- const char *char_end = ptr;
- int is_lower = ti->is_lower();
- if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) {
- const token_info *ti2 = lookup_token(char_end, ptr);
- if (!ti2->is_accent())
- ptr = char_end;
- }
- if (is_lower) {
- if (!in_small_point_size) {
- result += "\\s-2";
- in_small_point_size = 1;
- }
- ti->upper_case(start, char_end, result);
- result.append(char_end, ptr - char_end);
- }
- else {
- if (in_small_point_size) {
- result += "\\s+2";
- in_small_point_size = 0;
- }
- result.append(start, ptr - start);
- }
- }
- if (in_small_point_size)
- result += "\\s+2";
-}
-
-void capitalize_field(string &str)
-{
- string temp;
- capitalize(str.contents(), str.contents() + str.length(), temp);
- str.move(temp);
-}
-
-int is_terminated(const char *ptr, const char *end)
-{
- const char *last_token = end;
- for (;;) {
- const char *p = ptr;
- if (!get_token(&ptr, end))
- break;
- last_token = p;
- }
- return end - last_token == 1
- && (*last_token == '.' || *last_token == '!' || *last_token == '?');
-}
-
-void reference::output(FILE *fp)
-{
- fputs(".]-\n", fp);
- for (int i = 0; i < 256; i++)
- if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) {
- string &f = field[field_index[i]];
- if (!csdigit(i)) {
- int j = reverse_fields.search(i);
- if (j >= 0) {
- int n;
- int len = reverse_fields.length();
- if (++j < len && csdigit(reverse_fields[j])) {
- n = reverse_fields[j] - '0';
- for (++j; j < len && csdigit(reverse_fields[j]); j++)
- // should check for overflow
- n = n*10 + reverse_fields[j] - '0';
- }
- else
- n = INT_MAX;
- reverse_names(f, n);
- }
- }
- int is_multiple = join_fields(f) > 0;
- if (capitalize_fields.search(i) >= 0)
- capitalize_field(f);
- if (memchr(f.contents(), '\n', f.length()) == 0) {
- fprintf(fp, ".ds [%c ", i);
- if (f[0] == ' ' || f[0] == '\\' || f[0] == '"')
- putc('"', fp);
- put_string(f, fp);
- putc('\n', fp);
- }
- else {
- fprintf(fp, ".de [%c\n", i);
- put_string(f, fp);
- fputs("..\n", fp);
- }
- if (i == 'P') {
- int multiple_pages = 0;
- const char *s = f.contents();
- const char *end = f.contents() + f.length();
- for (;;) {
- const char *token_start = s;
- if (!get_token(&s, end))
- break;
- const token_info *ti = lookup_token(token_start, s);
- if (ti->is_hyphen() || ti->is_range_sep()) {
- multiple_pages = 1;
- break;
- }
- }
- fprintf(fp, ".nr [P %d\n", multiple_pages);
- }
- else if (i == 'E')
- fprintf(fp, ".nr [E %d\n", is_multiple);
- }
- for (const char *p = "TAO"; *p; p++) {
- int fi = field_index[(unsigned char)*p];
- if (fi != NULL_FIELD_INDEX) {
- string &f = field[fi];
- fprintf(fp, ".nr [%c %d\n", *p,
- is_terminated(f.contents(), f.contents() + f.length()));
- }
- }
- int t = classify();
- fprintf(fp, ".][ %d %s\n", t, reference_types[t]);
- if (annotation_macro.length() > 0 && annotation_field >= 0
- && field_index[annotation_field] != NULL_FIELD_INDEX) {
- putc('.', fp);
- put_string(annotation_macro, fp);
- putc('\n', fp);
- put_string(field[field_index[annotation_field]], fp);
- }
-}
-
-void reference::print_sort_key_comment(FILE *fp)
-{
- fputs(".\\\"", fp);
- put_string(sort_key, fp);
- putc('\n', fp);
-}
-
-const char *find_year(const char *start, const char *end, const char **endp)
-{
- for (;;) {
- while (start < end && !csdigit(*start))
- start++;
- const char *ptr = start;
- if (start == end)
- break;
- while (ptr < end && csdigit(*ptr))
- ptr++;
- if (ptr - start == 4 || ptr - start == 3
- || (ptr - start == 2
- && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) {
- *endp = ptr;
- return start;
- }
- start = ptr;
- }
- return 0;
-}
-
-static const char *find_day(const char *start, const char *end,
- const char **endp)
-{
- for (;;) {
- while (start < end && !csdigit(*start))
- start++;
- const char *ptr = start;
- if (start == end)
- break;
- while (ptr < end && csdigit(*ptr))
- ptr++;
- if ((ptr - start == 1 && start[0] != '0')
- || (ptr - start == 2 &&
- (start[0] == '1'
- || start[0] == '2'
- || (start[0] == '3' && start[1] <= '1')
- || (start[0] == '0' && start[1] != '0')))) {
- *endp = ptr;
- return start;
- }
- start = ptr;
- }
- return 0;
-}
-
-static int find_month(const char *start, const char *end)
-{
- static const char *months[] = {
- "january",
- "february",
- "march",
- "april",
- "may",
- "june",
- "july",
- "august",
- "september",
- "october",
- "november",
- "december",
- };
- for (;;) {
- while (start < end && !csalpha(*start))
- start++;
- const char *ptr = start;
- if (start == end)
- break;
- while (ptr < end && csalpha(*ptr))
- ptr++;
- if (ptr - start >= 3) {
- for (int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
- const char *q = months[i];
- const char *p = start;
- for (; p < ptr; p++, q++)
- if (cmlower(*p) != *q)
- break;
- if (p >= ptr)
- return i;
- }
- }
- start = ptr;
- }
- return -1;
-}
-
-int reference::contains_field(char c) const
-{
- return field_index[(unsigned char)c] != NULL_FIELD_INDEX;
-}
-
-int reference::classify()
-{
- if (contains_field('J'))
- return JOURNAL_ARTICLE;
- if (contains_field('B'))
- return ARTICLE_IN_BOOK;
- if (contains_field('G'))
- return TECH_REPORT;
- if (contains_field('R'))
- return TECH_REPORT;
- if (contains_field('I'))
- return BOOK;
- if (contains_field('M'))
- return BELL_TM;
- return OTHER;
-}
-
-const char *reference::get_year(const char **endp) const
-{
- if (field_index['D'] != NULL_FIELD_INDEX) {
- string &date = field[field_index['D']];
- const char *start = date.contents();
- const char *end = start + date.length();
- return find_year(start, end, endp);
- }
- else
- return 0;
-}
-
-const char *reference::get_field(unsigned char c, const char **endp) const
-{
- if (field_index[c] != NULL_FIELD_INDEX) {
- string &f = field[field_index[c]];
- const char *start = f.contents();
- *endp = start + f.length();
- return start;
- }
- else
- return 0;
-}
-
-const char *reference::get_date(const char **endp) const
-{
- return get_field('D', endp);
-}
-
-const char *nth_field(int i, const char *start, const char **endp)
-{
- while (--i >= 0) {
- start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
- if (!start)
- return 0;
- start++;
- }
- const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
- if (e)
- *endp = e;
- return start;
-}
-
-const char *reference::get_author(int i, const char **endp) const
-{
- for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
- const char *start = get_field(*f, endp);
- if (start) {
- if (strchr(MULTI_FIELD_NAMES, *f) != 0)
- return nth_field(i, start, endp);
- else if (i == 0)
- return start;
- else
- return 0;
- }
- }
- return 0;
-}
-
-const char *reference::get_author_last_name(int i, const char **endp) const
-{
- for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
- const char *start = get_field(*f, endp);
- if (start) {
- if (strchr(MULTI_FIELD_NAMES, *f) != 0) {
- start = nth_field(i, start, endp);
- if (!start)
- return 0;
- }
- if (*f == 'A')
- return find_last_name(start, *endp, endp);
- else
- return start;
- }
- }
- return 0;
-}
-
-void reference::set_date(string &d)
-{
- if (d.length() == 0)
- delete_field('D');
- else
- insert_field('D', d);
-}
-
-int same_year(const reference &r1, const reference &r2)
-{
- const char *ye1;
- const char *ys1 = r1.get_year(&ye1);
- const char *ye2;
- const char *ys2 = r2.get_year(&ye2);
- if (ys1 == 0) {
- if (ys2 == 0)
- return same_date(r1, r2);
- else
- return 0;
- }
- else if (ys2 == 0)
- return 0;
- else if (ye1 - ys1 != ye2 - ys2)
- return 0;
- else
- return memcmp(ys1, ys2, ye1 - ys1) == 0;
-}
-
-int same_date(const reference &r1, const reference &r2)
-{
- const char *e1;
- const char *s1 = r1.get_date(&e1);
- const char *e2;
- const char *s2 = r2.get_date(&e2);
- if (s1 == 0)
- return s2 == 0;
- else if (s2 == 0)
- return 0;
- else if (e1 - s1 != e2 - s2)
- return 0;
- else
- return memcmp(s1, s2, e1 - s1) == 0;
-}
-
-const char *reference::get_sort_field(int i, int si, int ssi,
- const char **endp) const
-{
- const char *start = sort_key.contents();
- const char *end = start + sort_key.length();
- if (i < 0) {
- *endp = end;
- return start;
- }
- while (--i >= 0) {
- start = (char *)memchr(start, SORT_SEP, end - start);
- if (!start)
- return 0;
- start++;
- }
- const char *e = (char *)memchr(start, SORT_SEP, end - start);
- if (e)
- end = e;
- if (si < 0) {
- *endp = end;
- return start;
- }
- while (--si >= 0) {
- start = (char *)memchr(start, SORT_SUB_SEP, end - start);
- if (!start)
- return 0;
- start++;
- }
- e = (char *)memchr(start, SORT_SUB_SEP, end - start);
- if (e)
- end = e;
- if (ssi < 0) {
- *endp = end;
- return start;
- }
- while (--ssi >= 0) {
- start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
- if (!start)
- return 0;
- start++;
- }
- e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
- if (e)
- end = e;
- *endp = end;
- return start;
-}
-
diff --git a/contrib/groff/refer/ref.h b/contrib/groff/refer/ref.h
deleted file mode 100644
index 13a984a4c727..000000000000
--- a/contrib/groff/refer/ref.h
+++ /dev/null
@@ -1,120 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-struct label_info;
-
-enum label_type { NORMAL_LABEL, SHORT_LABEL };
-const int N_LABEL_TYPES = 2;
-
-struct substring_position {
- int start;
- int length;
- substring_position() : start(-1) { }
-};
-
-class int_set {
- string v;
-public:
- int_set() { }
- void set(int i);
- int get(int i) const;
-};
-
-class reference {
-private:
- unsigned h;
- reference_id rid;
- int merged;
- string sort_key;
- int no;
- string *field;
- int nfields;
- unsigned char field_index[256];
- enum { NULL_FIELD_INDEX = 255 };
- string label;
- substring_position separator_pos;
- string short_label;
- substring_position short_separator_pos;
- label_info *label_ptr;
- string authors;
- int computed_authors;
- int last_needed_author;
- int nauthors;
- int_set last_name_unambiguous;
-
- int contains_field(char) const;
- void insert_field(unsigned char, string &s);
- void delete_field(unsigned char);
- void set_date(string &);
- const char *get_sort_field(int i, int si, int ssi, const char **endp) const;
- int merge_labels_by_parts(reference **, int, label_type, string &);
- int merge_labels_by_number(reference **, int, label_type, string &);
-public:
- reference(const char * = 0, int = -1, reference_id * = 0);
- ~reference();
- void output(FILE *);
- void print_sort_key_comment(FILE *);
- void set_number(int);
- int get_number() const { return no; }
- unsigned hash() const { return h; }
- const string &get_label(label_type type) const;
- const substring_position &get_separator_pos(label_type) const;
- int is_merged() const { return merged; }
- void compute_sort_key();
- void compute_hash_code();
- void pre_compute_label();
- void compute_label();
- void immediate_compute_label();
- int classify();
- void merge(reference &);
- int merge_labels(reference **, int, label_type, string &);
- int get_nauthors() const;
- void need_author(int);
- void set_last_name_unambiguous(int);
- void sortify_authors(int, string &) const;
- void canonicalize_authors(string &) const;
- void sortify_field(unsigned char, int, string &) const;
- const char *get_author(int, const char **) const;
- const char *get_author_last_name(int, const char **) const;
- const char *get_date(const char **) const;
- const char *get_year(const char **) const;
- const char *get_field(unsigned char, const char **) const;
- const label_info *get_label_ptr() const { return label_ptr; }
- const char *get_authors(const char **) const;
- // for sorting
- friend int compare_reference(const reference &r1, const reference &r2);
- // for merging
- friend int same_reference(const reference &, const reference &);
- friend int same_year(const reference &, const reference &);
- friend int same_date(const reference &, const reference &);
- friend int same_author_last_name(const reference &, const reference &, int);
- friend int same_author_name(const reference &, const reference &, int);
-};
-
-const char *find_year(const char *, const char *, const char **);
-const char *find_last_name(const char *, const char *, const char **);
-
-const char *nth_field(int i, const char *start, const char **endp);
-
-void capitalize(const char *ptr, const char *end, string &result);
-void reverse_name(const char *ptr, const char *end, string &result);
-void uppercase(const char *ptr, const char *end, string &result);
-void lowercase(const char *ptr, const char *end, string &result);
-void abbreviate_name(const char *ptr, const char *end, string &result);
diff --git a/contrib/groff/refer/refer.cc b/contrib/groff/refer/refer.cc
deleted file mode 100644
index 70c696f4a9d3..000000000000
--- a/contrib/groff/refer/refer.cc
+++ /dev/null
@@ -1,1228 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#include "refer.h"
-#include "refid.h"
-#include "ref.h"
-#include "token.h"
-#include "search.h"
-#include "command.h"
-
-const char PRE_LABEL_MARKER = '\013';
-const char POST_LABEL_MARKER = '\014';
-const char LABEL_MARKER = '\015'; // label_type is added on
-
-#define FORCE_LEFT_BRACKET 04
-#define FORCE_RIGHT_BRACKET 010
-
-static FILE *outfp = stdout;
-
-string capitalize_fields;
-string reverse_fields;
-string abbreviate_fields;
-string period_before_last_name = ". ";
-string period_before_initial = ".";
-string period_before_hyphen = "";
-string period_before_other = ". ";
-string sort_fields;
-int annotation_field = -1;
-string annotation_macro;
-string discard_fields = "XYZ";
-string pre_label = "\\*([.";
-string post_label = "\\*(.]";
-string sep_label = ", ";
-int accumulate = 0;
-int move_punctuation = 0;
-int abbreviate_label_ranges = 0;
-string label_range_indicator;
-int label_in_text = 1;
-int label_in_reference = 1;
-int date_as_label = 0;
-int sort_adjacent_labels = 0;
-// Join exactly two authors with this.
-string join_authors_exactly_two = " and ";
-// When there are more than two authors join the last two with this.
-string join_authors_last_two = ", and ";
-// Otherwise join authors with this.
-string join_authors_default = ", ";
-string separate_label_second_parts = ", ";
-// Use this string to represent that there are other authors.
-string et_al = " et al";
-// Use et al only if it can replace at least this many authors.
-int et_al_min_elide = 2;
-// Use et al only if the total number of authors is at least this.
-int et_al_min_total = 3;
-
-
-int compatible_flag = 0;
-
-int short_label_flag = 0;
-
-static int recognize_R1_R2 = 1;
-
-search_list database_list;
-int search_default = 1;
-static int default_database_loaded = 0;
-
-static reference **citation = 0;
-static int ncitations = 0;
-static int citation_max = 0;
-
-static reference **reference_hash_table = 0;
-static int hash_table_size;
-static int nreferences = 0;
-
-static int need_syncing = 0;
-string pending_line;
-string pending_lf_lines;
-
-static void output_pending_line();
-static unsigned immediately_handle_reference(const string &);
-static void immediately_output_references();
-static unsigned store_reference(const string &);
-static void divert_to_temporary_file();
-static reference *make_reference(const string &, unsigned *);
-static void usage();
-static void do_file(const char *);
-static void split_punct(string &line, string &punct);
-static void output_citation_group(reference **v, int n, label_type, FILE *fp);
-static void possibly_load_default_database();
-
-int main(int argc, char **argv)
-{
- program_name = argv[0];
- static char stderr_buf[BUFSIZ];
- setbuf(stderr, stderr_buf);
- outfp = stdout;
- int finished_options = 0;
- int bib_flag = 0;
- int done_spec = 0;
-
- for (--argc, ++argv;
- !finished_options && argc > 0 && argv[0][0] == '-'
- && argv[0][1] != '\0';
- argv++, argc--) {
- const char *opt = argv[0] + 1;
- while (opt != 0 && *opt != '\0') {
- switch (*opt) {
- case 'C':
- compatible_flag = 1;
- opt++;
- break;
- case 'B':
- bib_flag = 1;
- label_in_reference = 0;
- label_in_text = 0;
- ++opt;
- if (*opt == '\0') {
- annotation_field = 'X';
- annotation_macro = "AP";
- }
- else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') {
- annotation_field = opt[0];
- annotation_macro = opt + 2;
- }
- opt = 0;
- break;
- case 'P':
- move_punctuation = 1;
- opt++;
- break;
- case 'R':
- recognize_R1_R2 = 0;
- opt++;
- break;
- case 'S':
- // Not a very useful spec.
- set_label_spec("(A.n|Q)', '(D.y|D)");
- done_spec = 1;
- pre_label = " (";
- post_label = ")";
- sep_label = "; ";
- opt++;
- break;
- case 'V':
- verify_flag = 1;
- opt++;
- break;
- case 'f':
- {
- const char *num = 0;
- if (*++opt == '\0') {
- if (argc > 1) {
- num = *++argv;
- --argc;
- }
- else {
- error("option `f' requires an argument");
- usage();
- }
- }
- else {
- num = opt;
- opt = 0;
- }
- const char *ptr;
- for (ptr = num; *ptr; ptr++)
- if (!csdigit(*ptr)) {
- error("bad character `%1' in argument to -f option", *ptr);
- break;
- }
- if (*ptr == '\0') {
- string spec;
- spec = '%';
- spec += num;
- spec += '\0';
- set_label_spec(spec.contents());
- done_spec = 1;
- }
- break;
- }
- case 'b':
- label_in_text = 0;
- label_in_reference = 0;
- opt++;
- break;
- case 'e':
- accumulate = 1;
- opt++;
- break;
- case 'c':
- capitalize_fields = ++opt;
- opt = 0;
- break;
- case 'k':
- {
- char buf[5];
- if (csalpha(*++opt))
- buf[0] = *opt++;
- else {
- if (*opt != '\0')
- error("bad field name `%1'", *opt++);
- buf[0] = 'L';
- }
- buf[1] = '~';
- buf[2] = '%';
- buf[3] = 'a';
- buf[4] = '\0';
- set_label_spec(buf);
- done_spec = 1;
- }
- break;
- case 'a':
- {
- const char *ptr;
- for (ptr = ++opt; *ptr; ptr++)
- if (!csdigit(*ptr)) {
- error("argument to `a' option not a number");
- break;
- }
- if (*ptr == '\0') {
- reverse_fields = 'A';
- reverse_fields += opt;
- }
- opt = 0;
- }
- break;
- case 'i':
- linear_ignore_fields = ++opt;
- opt = 0;
- break;
- case 'l':
- {
- char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a
- strcpy(buf, "A.n");
- if (*++opt != '\0' && *opt != ',') {
- char *ptr;
- long n = strtol(opt, &ptr, 10);
- if (n == 0 && ptr == opt) {
- error("bad integer `%1' in `l' option", opt);
- opt = 0;
- break;
- }
- if (n < 0)
- n = 0;
- opt = ptr;
- sprintf(strchr(buf, '\0'), "+%ld", n);
- }
- strcat(buf, "D.y");
- if (*opt == ',')
- opt++;
- if (*opt != '\0') {
- char *ptr;
- long n = strtol(opt, &ptr, 10);
- if (n == 0 && ptr == opt) {
- error("bad integer `%1' in `l' option", opt);
- opt = 0;
- break;
- }
- if (n < 0)
- n = 0;
- sprintf(strchr(buf, '\0'), "-%ld", n);
- opt = ptr;
- if (*opt != '\0')
- error("argument to `l' option not of form `m,n'");
- }
- strcat(buf, "%a");
- if (!set_label_spec(buf))
- assert(0);
- done_spec = 1;
- }
- break;
- case 'n':
- search_default = 0;
- opt++;
- break;
- case 'p':
- {
- const char *filename = 0;
- if (*++opt == '\0') {
- if (argc > 1) {
- filename = *++argv;
- argc--;
- }
- else {
- error("option `p' requires an argument");
- usage();
- }
- }
- else {
- filename = opt;
- opt = 0;
- }
- database_list.add_file(filename);
- }
- break;
- case 's':
- if (*++opt == '\0')
- sort_fields = "AD";
- else {
- sort_fields = opt;
- opt = 0;
- }
- accumulate = 1;
- break;
- case 't':
- {
- char *ptr;
- long n = strtol(opt, &ptr, 10);
- if (n == 0 && ptr == opt) {
- error("bad integer `%1' in `t' option", opt);
- opt = 0;
- break;
- }
- if (n < 1)
- n = 1;
- linear_truncate_len = int(n);
- opt = ptr;
- break;
- }
- case 'v':
- {
- extern const char *Version_string;
- fprintf(stderr, "GNU refer version %s\n", Version_string);
- fflush(stderr);
- opt++;
- break;
- }
- case '-':
- if (opt[1] == '\0') {
- finished_options = 1;
- opt++;
- break;
- }
- // fall through
- default:
- error("unrecognized option `%1'", *opt);
- usage();
- break;
- }
- }
- }
- if (!done_spec)
- set_label_spec("%1");
- if (argc <= 0) {
- if (bib_flag)
- do_bib("-");
- else
- do_file("-");
- }
- else {
- for (int i = 0; i < argc; i++) {
- if (bib_flag)
- do_bib(argv[i]);
- else
- do_file(argv[i]);
- }
- }
- if (accumulate)
- output_references();
- if (fflush(stdout) < 0)
- fatal("output error");
- return 0;
-}
-
-static void usage()
-{
- fprintf(stderr,
-"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n"
-" [-sXYZ] [-tN] [-BL.M] [files ...]\n",
- program_name);
- exit(1);
-}
-
-static void possibly_load_default_database()
-{
- if (search_default && !default_database_loaded) {
- char *filename = getenv("REFER");
- if (filename)
- database_list.add_file(filename);
- else
- database_list.add_file(DEFAULT_INDEX, 1);
- default_database_loaded = 1;
- }
-}
-
-static int is_list(const string &str)
-{
- const char *start = str.contents();
- const char *end = start + str.length();
- while (end > start && csspace(end[-1]))
- end--;
- while (start < end && csspace(*start))
- start++;
- return end - start == 6 && memcmp(start, "$LIST$", 6) == 0;
-}
-
-static void do_file(const char *filename)
-{
- FILE *fp;
- if (strcmp(filename, "-") == 0) {
- fp = stdin;
- }
- else {
- errno = 0;
- fp = fopen(filename, "r");
- if (fp == 0) {
- error("can't open `%1': %2", filename, strerror(errno));
- return;
- }
- }
- current_filename = filename;
- fprintf(outfp, ".lf 1 %s\n", filename);
- string line;
- current_lineno = 0;
- for (;;) {
- line.clear();
- for (;;) {
- int c = getc(fp);
- if (c == EOF) {
- if (line.length() > 0)
- line += '\n';
- break;
- }
- if (illegal_input_char(c))
- error("illegal input character code %1", c);
- else {
- line += c;
- if (c == '\n')
- break;
- }
- }
- int len = line.length();
- if (len == 0)
- break;
- current_lineno++;
- if (len >= 2 && line[0] == '.' && line[1] == '[') {
- int start_lineno = current_lineno;
- int start_of_line = 1;
- string str;
- string post;
- string pre(line.contents() + 2, line.length() - 3);
- for (;;) {
- int c = getc(fp);
- if (c == EOF) {
- error_with_file_and_line(current_filename, start_lineno,
- "missing `.]' line");
- break;
- }
- if (start_of_line)
- current_lineno++;
- if (start_of_line && c == '.') {
- int d = getc(fp);
- if (d == ']') {
- while ((d = getc(fp)) != '\n' && d != EOF) {
- if (illegal_input_char(d))
- error("illegal input character code %1", d);
- else
- post += d;
- }
- break;
- }
- if (d != EOF)
- ungetc(d, fp);
- }
- if (illegal_input_char(c))
- error("illegal input character code %1", c);
- else
- str += c;
- start_of_line = (c == '\n');
- }
- if (is_list(str)) {
- output_pending_line();
- if (accumulate)
- output_references();
- else
- error("found `$LIST$' but not accumulating references");
- }
- else {
- unsigned flags = (accumulate
- ? store_reference(str)
- : immediately_handle_reference(str));
- if (label_in_text) {
- if (accumulate && outfp == stdout)
- divert_to_temporary_file();
- if (pending_line.length() == 0) {
- warning("can't attach citation to previous line");
- }
- else
- pending_line.set_length(pending_line.length() - 1);
- string punct;
- if (move_punctuation)
- split_punct(pending_line, punct);
- int have_text = pre.length() > 0 || post.length() > 0;
- label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET
- |FORCE_RIGHT_BRACKET));
- if ((flags & FORCE_LEFT_BRACKET) || !have_text)
- pending_line += PRE_LABEL_MARKER;
- pending_line += pre;
- char lm = LABEL_MARKER + (int)lt;
- pending_line += lm;
- pending_line += post;
- if ((flags & FORCE_RIGHT_BRACKET) || !have_text)
- pending_line += POST_LABEL_MARKER;
- pending_line += punct;
- pending_line += '\n';
- }
- }
- need_syncing = 1;
- }
- else if (len >= 4
- && line[0] == '.' && line[1] == 'l' && line[2] == 'f'
- && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
- pending_lf_lines += line;
- line += '\0';
- if (interpret_lf_args(line.contents() + 3))
- current_lineno--;
- }
- else if (recognize_R1_R2
- && len >= 4
- && line[0] == '.' && line[1] == 'R' && line[2] == '1'
- && (compatible_flag || line[3] == '\n' || line[3] == ' ')) {
- line.clear();
- int start_of_line = 1;
- int start_lineno = current_lineno;
- for (;;) {
- int c = getc(fp);
- if (c != EOF && start_of_line)
- current_lineno++;
- if (start_of_line && c == '.') {
- c = getc(fp);
- if (c == 'R') {
- c = getc(fp);
- if (c == '2') {
- c = getc(fp);
- if (compatible_flag || c == ' ' || c == '\n' || c == EOF) {
- while (c != EOF && c != '\n')
- c = getc(fp);
- break;
- }
- else {
- line += '.';
- line += 'R';
- line += '2';
- }
- }
- else {
- line += '.';
- line += 'R';
- }
- }
- else
- line += '.';
- }
- if (c == EOF) {
- error_with_file_and_line(current_filename, start_lineno,
- "missing `.R2' line");
- break;
- }
- if (illegal_input_char(c))
- error("illegal input character code %1", int(c));
- else {
- line += c;
- start_of_line = c == '\n';
- }
- }
- output_pending_line();
- if (accumulate)
- output_references();
- else
- nreferences = 0;
- process_commands(line, current_filename, start_lineno + 1);
- need_syncing = 1;
- }
- else {
- output_pending_line();
- pending_line = line;
- }
- }
- need_syncing = 0;
- output_pending_line();
- if (fp != stdin)
- fclose(fp);
-}
-
-class label_processing_state {
- enum {
- NORMAL,
- PENDING_LABEL,
- PENDING_LABEL_POST,
- PENDING_LABEL_POST_PRE,
- PENDING_POST
- } state;
- label_type type; // type of pending labels
- int count; // number of pending labels
- reference **rptr; // pointer to next reference
- int rcount; // number of references left
- FILE *fp;
- int handle_pending(int c);
-public:
- label_processing_state(reference **, int, FILE *);
- ~label_processing_state();
- void process(int c);
-};
-
-static void output_pending_line()
-{
- if (label_in_text && !accumulate && ncitations > 0) {
- label_processing_state state(citation, ncitations, outfp);
- int len = pending_line.length();
- for (int i = 0; i < len; i++)
- state.process((unsigned char)(pending_line[i]));
- }
- else
- put_string(pending_line, outfp);
- pending_line.clear();
- if (pending_lf_lines.length() > 0) {
- put_string(pending_lf_lines, outfp);
- pending_lf_lines.clear();
- }
- if (!accumulate)
- immediately_output_references();
- if (need_syncing) {
- fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename);
- need_syncing = 0;
- }
-}
-
-static void split_punct(string &line, string &punct)
-{
- const char *start = line.contents();
- const char *end = start + line.length();
- const char *ptr = start;
- const char *last_token_start = 0;
- for (;;) {
- if (ptr >= end)
- break;
- last_token_start = ptr;
- if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER
- || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES))
- ptr++;
- else if (!get_token(&ptr, end))
- break;
- }
- if (last_token_start) {
- const token_info *ti = lookup_token(last_token_start, end);
- if (ti->is_punct()) {
- punct.append(last_token_start, end - last_token_start);
- line.set_length(last_token_start - start);
- }
- }
-}
-
-static void divert_to_temporary_file()
-{
- outfp = xtmpfile();
-}
-
-static void store_citation(reference *ref)
-{
- if (ncitations >= citation_max) {
- if (citation == 0)
- citation = new reference*[citation_max = 100];
- else {
- reference **old_citation = citation;
- citation_max *= 2;
- citation = new reference *[citation_max];
- memcpy(citation, old_citation, ncitations*sizeof(reference *));
- a_delete old_citation;
- }
- }
- citation[ncitations++] = ref;
-}
-
-static unsigned store_reference(const string &str)
-{
- if (reference_hash_table == 0) {
- reference_hash_table = new reference *[17];
- hash_table_size = 17;
- for (int i = 0; i < hash_table_size; i++)
- reference_hash_table[i] = 0;
- }
- unsigned flags;
- reference *ref = make_reference(str, &flags);
- ref->compute_hash_code();
- unsigned h = ref->hash();
- reference **ptr;
- for (ptr = reference_hash_table + (h % hash_table_size);
- *ptr != 0;
- ((ptr == reference_hash_table)
- ? (ptr = reference_hash_table + hash_table_size - 1)
- : --ptr))
- if (same_reference(**ptr, *ref))
- break;
- if (*ptr != 0) {
- if (ref->is_merged())
- warning("fields ignored because reference already used");
- delete ref;
- ref = *ptr;
- }
- else {
- *ptr = ref;
- ref->set_number(nreferences);
- nreferences++;
- ref->pre_compute_label();
- ref->compute_sort_key();
- if (nreferences*2 >= hash_table_size) {
- // Rehash it.
- reference **old_table = reference_hash_table;
- int old_size = hash_table_size;
- hash_table_size = next_size(hash_table_size);
- reference_hash_table = new reference*[hash_table_size];
- int i;
- for (i = 0; i < hash_table_size; i++)
- reference_hash_table[i] = 0;
- for (i = 0; i < old_size; i++)
- if (old_table[i]) {
- reference **p;
- for (p = (reference_hash_table
- + (old_table[i]->hash() % hash_table_size));
- *p;
- ((p == reference_hash_table)
- ? (p = reference_hash_table + hash_table_size - 1)
- : --p))
- ;
- *p = old_table[i];
- }
- a_delete old_table;
- }
- }
- if (label_in_text)
- store_citation(ref);
- return flags;
-}
-
-unsigned immediately_handle_reference(const string &str)
-{
- unsigned flags;
- reference *ref = make_reference(str, &flags);
- ref->set_number(nreferences);
- if (label_in_text || label_in_reference) {
- ref->pre_compute_label();
- ref->immediate_compute_label();
- }
- nreferences++;
- store_citation(ref);
- return flags;
-}
-
-static void immediately_output_references()
-{
- for (int i = 0; i < ncitations; i++) {
- reference *ref = citation[i];
- if (label_in_reference) {
- fputs(".ds [F ", outfp);
- const string &label = ref->get_label(NORMAL_LABEL);
- if (label.length() > 0
- && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
- putc('"', outfp);
- put_string(label, outfp);
- putc('\n', outfp);
- }
- ref->output(outfp);
- delete ref;
- }
- ncitations = 0;
-}
-
-static void output_citation_group(reference **v, int n, label_type type,
- FILE *fp)
-{
- if (sort_adjacent_labels) {
- // Do an insertion sort. Usually n will be very small.
- for (int i = 1; i < n; i++) {
- int num = v[i]->get_number();
- reference *temp = v[i];
- int j;
- for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--)
- v[j + 1] = v[j];
- v[j + 1] = temp;
- }
- }
- // This messes up if !accumulate.
- if (accumulate && n > 1) {
- // remove duplicates
- int j = 1;
- for (int i = 1; i < n; i++)
- if (v[i]->get_label(type) != v[i - 1]->get_label(type))
- v[j++] = v[i];
- n = j;
- }
- string merged_label;
- for (int i = 0; i < n; i++) {
- int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label);
- if (nmerged > 0) {
- put_string(merged_label, fp);
- i += nmerged;
- }
- else
- put_string(v[i]->get_label(type), fp);
- if (i < n - 1)
- put_string(sep_label, fp);
- }
-}
-
-
-label_processing_state::label_processing_state(reference **p, int n, FILE *f)
-: state(NORMAL), count(0), rptr(p), rcount(n), fp(f)
-{
-}
-
-label_processing_state::~label_processing_state()
-{
- int handled = handle_pending(EOF);
- assert(!handled);
- assert(rcount == 0);
-}
-
-int label_processing_state::handle_pending(int c)
-{
- switch (state) {
- case NORMAL:
- break;
- case PENDING_LABEL:
- if (c == POST_LABEL_MARKER) {
- state = PENDING_LABEL_POST;
- return 1;
- }
- else {
- output_citation_group(rptr, count, type, fp);
- rptr += count ;
- rcount -= count;
- state = NORMAL;
- }
- break;
- case PENDING_LABEL_POST:
- if (c == PRE_LABEL_MARKER) {
- state = PENDING_LABEL_POST_PRE;
- return 1;
- }
- else {
- output_citation_group(rptr, count, type, fp);
- rptr += count;
- rcount -= count;
- put_string(post_label, fp);
- state = NORMAL;
- }
- break;
- case PENDING_LABEL_POST_PRE:
- if (c >= LABEL_MARKER
- && c < LABEL_MARKER + N_LABEL_TYPES
- && c - LABEL_MARKER == type) {
- count += 1;
- state = PENDING_LABEL;
- return 1;
- }
- else {
- output_citation_group(rptr, count, type, fp);
- rptr += count;
- rcount -= count;
- put_string(sep_label, fp);
- state = NORMAL;
- }
- break;
- case PENDING_POST:
- if (c == PRE_LABEL_MARKER) {
- put_string(sep_label, fp);
- state = NORMAL;
- return 1;
- }
- else {
- put_string(post_label, fp);
- state = NORMAL;
- }
- break;
- }
- return 0;
-}
-
-void label_processing_state::process(int c)
-{
- if (handle_pending(c))
- return;
- assert(state == NORMAL);
- switch (c) {
- case PRE_LABEL_MARKER:
- put_string(pre_label, fp);
- state = NORMAL;
- break;
- case POST_LABEL_MARKER:
- state = PENDING_POST;
- break;
- case LABEL_MARKER:
- case LABEL_MARKER + 1:
- count = 1;
- state = PENDING_LABEL;
- type = label_type(c - LABEL_MARKER);
- break;
- default:
- state = NORMAL;
- putc(c, fp);
- break;
- }
-}
-
-extern "C" {
-
-int rcompare(const void *p1, const void *p2)
-{
- return compare_reference(**(reference **)p1, **(reference **)p2);
-}
-
-}
-
-void output_references()
-{
- assert(accumulate);
- if (nreferences > 0) {
- int j = 0;
- int i;
- for (i = 0; i < hash_table_size; i++)
- if (reference_hash_table[i] != 0)
- reference_hash_table[j++] = reference_hash_table[i];
- assert(j == nreferences);
- for (; j < hash_table_size; j++)
- reference_hash_table[j] = 0;
- qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare);
- for (i = 0; i < nreferences; i++)
- reference_hash_table[i]->set_number(i);
- compute_labels(reference_hash_table, nreferences);
- }
- if (outfp != stdout) {
- rewind(outfp);
- {
- label_processing_state state(citation, ncitations, stdout);
- int c;
- while ((c = getc(outfp)) != EOF)
- state.process(c);
- }
- ncitations = 0;
- fclose(outfp);
- outfp = stdout;
- }
- if (nreferences > 0) {
- fputs(".]<\n", outfp);
- for (int i = 0; i < nreferences; i++) {
- if (sort_fields.length() > 0)
- reference_hash_table[i]->print_sort_key_comment(outfp);
- if (label_in_reference) {
- fputs(".ds [F ", outfp);
- const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL);
- if (label.length() > 0
- && (label[0] == ' ' || label[0] == '\\' || label[0] == '"'))
- putc('"', outfp);
- put_string(label, outfp);
- putc('\n', outfp);
- }
- reference_hash_table[i]->output(outfp);
- delete reference_hash_table[i];
- reference_hash_table[i] = 0;
- }
- fputs(".]>\n", outfp);
- nreferences = 0;
- }
- clear_labels();
-}
-
-static reference *find_reference(const char *query, int query_len)
-{
- // This is so that error messages look better.
- while (query_len > 0 && csspace(query[query_len - 1]))
- query_len--;
- string str;
- for (int i = 0; i < query_len; i++)
- str += query[i] == '\n' ? ' ' : query[i];
- str += '\0';
- possibly_load_default_database();
- search_list_iterator iter(&database_list, str.contents());
- reference_id rid;
- const char *start;
- int len;
- if (!iter.next(&start, &len, &rid)) {
- error("no matches for `%1'", str.contents());
- return 0;
- }
- const char *end = start + len;
- while (start < end) {
- if (*start == '%')
- break;
- while (start < end && *start++ != '\n')
- ;
- }
- if (start >= end) {
- error("found a reference for `%1' but it didn't contain any fields",
- str.contents());
- return 0;
- }
- reference *result = new reference(start, end - start, &rid);
- if (iter.next(&start, &len, &rid))
- warning("multiple matches for `%1'", str.contents());
- return result;
-}
-
-static reference *make_reference(const string &str, unsigned *flagsp)
-{
- const char *start = str.contents();
- const char *end = start + str.length();
- const char *ptr = start;
- while (ptr < end) {
- if (*ptr == '%')
- break;
- while (ptr < end && *ptr++ != '\n')
- ;
- }
- *flagsp = 0;
- for (; start < ptr; start++) {
- if (*start == '#')
- *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET
- | FORCE_LEFT_BRACKET)));
- else if (*start == '[')
- *flagsp |= FORCE_LEFT_BRACKET;
- else if (*start == ']')
- *flagsp |= FORCE_RIGHT_BRACKET;
- else if (!csspace(*start))
- break;
- }
- if (start >= end) {
- error("empty reference");
- return new reference;
- }
- reference *database_ref = 0;
- if (start < ptr)
- database_ref = find_reference(start, ptr - start);
- reference *inline_ref = 0;
- if (ptr < end)
- inline_ref = new reference(ptr, end - ptr);
- if (inline_ref) {
- if (database_ref) {
- database_ref->merge(*inline_ref);
- delete inline_ref;
- return database_ref;
- }
- else
- return inline_ref;
- }
- else if (database_ref)
- return database_ref;
- else
- return new reference;
-}
-
-static void do_ref(const string &str)
-{
- if (accumulate)
- (void)store_reference(str);
- else {
- (void)immediately_handle_reference(str);
- immediately_output_references();
- }
-}
-
-static void trim_blanks(string &str)
-{
- const char *start = str.contents();
- const char *end = start + str.length();
- while (end > start && end[-1] != '\n' && csspace(end[-1]))
- --end;
- str.set_length(end - start);
-}
-
-void do_bib(const char *filename)
-{
- FILE *fp;
- if (strcmp(filename, "-") == 0)
- fp = stdin;
- else {
- errno = 0;
- fp = fopen(filename, "r");
- if (fp == 0) {
- error("can't open `%1': %2", filename, strerror(errno));
- return;
- }
- current_filename = filename;
- }
- enum {
- START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT
- } state = START;
- string body;
- for (;;) {
- int c = getc(fp);
- if (c == EOF)
- break;
- if (illegal_input_char(c)) {
- error("illegal input character code %1", c);
- continue;
- }
- switch (state) {
- case START:
- if (c == '%') {
- body = c;
- state = BODY;
- }
- else if (c != '\n')
- state = MIDDLE;
- break;
- case MIDDLE:
- if (c == '\n')
- state = START;
- break;
- case BODY:
- body += c;
- if (c == '\n')
- state = BODY_START;
- break;
- case BODY_START:
- if (c == '\n') {
- do_ref(body);
- state = START;
- }
- else if (c == '.')
- state = BODY_DOT;
- else if (csspace(c)) {
- state = BODY_BLANK;
- body += c;
- }
- else {
- body += c;
- state = BODY;
- }
- break;
- case BODY_BLANK:
- if (c == '\n') {
- trim_blanks(body);
- do_ref(body);
- state = START;
- }
- else if (csspace(c))
- body += c;
- else {
- body += c;
- state = BODY;
- }
- break;
- case BODY_DOT:
- if (c == ']') {
- do_ref(body);
- state = MIDDLE;
- }
- else {
- body += '.';
- body += c;
- state = c == '\n' ? BODY_START : BODY;
- }
- break;
- default:
- assert(0);
- }
- if (c == '\n')
- current_lineno++;
- }
- switch (state) {
- case START:
- case MIDDLE:
- break;
- case BODY:
- body += '\n';
- do_ref(body);
- break;
- case BODY_DOT:
- case BODY_START:
- do_ref(body);
- break;
- case BODY_BLANK:
- trim_blanks(body);
- do_ref(body);
- break;
- }
- fclose(fp);
-}
-
-// from the Dragon Book
-
-unsigned hash_string(const char *s, int len)
-{
- const char *end = s + len;
- unsigned h = 0, g;
- while (s < end) {
- h <<= 4;
- h += *s++;
- if ((g = h & 0xf0000000) != 0) {
- h ^= g >> 24;
- h ^= g;
- }
- }
- return h;
-}
-
-int next_size(int n)
-{
- static const int table_sizes[] = {
- 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009,
- 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009,
- 16000057, 32000011, 64000031, 128000003, 0
- };
-
- const int *p;
- for (p = table_sizes; *p <= n && *p != 0; p++)
- ;
- assert(*p != 0);
- return *p;
-}
-
diff --git a/contrib/groff/refer/refer.h b/contrib/groff/refer/refer.h
deleted file mode 100644
index f0ab3cd70957..000000000000
--- a/contrib/groff/refer/refer.h
+++ /dev/null
@@ -1,78 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <string.h>
-#include <errno.h>
-
-#include "errarg.h"
-#include "error.h"
-#include "lib.h"
-#include "stringclass.h"
-#include "cset.h"
-#include "cmap.h"
-
-#include "defs.h"
-
-unsigned hash_string(const char *, int);
-int next_size(int);
-
-extern string capitalize_fields;
-extern string reverse_fields;
-extern string abbreviate_fields;
-extern string period_before_last_name;
-extern string period_before_initial;
-extern string period_before_hyphen;
-extern string period_before_other;
-extern string sort_fields;
-extern int annotation_field;
-extern string annotation_macro;
-extern string discard_fields;
-extern string articles;
-extern int abbreviate_label_ranges;
-extern string label_range_indicator;
-extern int date_as_label;
-extern string join_authors_exactly_two;
-extern string join_authors_last_two;
-extern string join_authors_default;
-extern string separate_label_second_parts;
-extern string et_al;
-extern int et_al_min_elide;
-extern int et_al_min_total;
-
-extern int compatible_flag;
-
-extern int set_label_spec(const char *);
-extern int set_date_label_spec(const char *);
-extern int set_short_label_spec(const char *);
-
-extern int short_label_flag;
-
-void clear_labels();
-void command_error(const char *,
- const errarg &arg1 = empty_errarg,
- const errarg &arg2 = empty_errarg,
- const errarg &arg3 = empty_errarg);
-
-struct reference;
-
-void compute_labels(reference **, int);
diff --git a/contrib/groff/refer/refer.man b/contrib/groff/refer/refer.man
deleted file mode 100644
index 13708cf0e568..000000000000
--- a/contrib/groff/refer/refer.man
+++ /dev/null
@@ -1,1302 +0,0 @@
-.ig \"-*- nroff -*-
-Copyright (C) 1989-2000 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of
-this manual provided the copyright notice and this permission notice
-are preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided that the
-entire resulting derived work is distributed under the terms of a
-permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that this permission notice may be included in
-translations approved by the Free Software Foundation instead of in
-the original English.
-..
-.de TQ
-.br
-.ns
-.TP \\$1
-..
-.\" Like TP, but if specified indent is more than half
-.\" the current line-length - indent, use the default indent.
-.de Tp
-.ie \\n(.$=0:((0\\$1)*2u>(\\n(.lu-\\n(.iu)) .TP
-.el .TP "\\$1"
-..
-.\" The BSD man macros can't handle " in arguments to font change macros,
-.\" so use \(ts instead of ".
-.tr \(ts"
-.TH @G@REFER @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@"
-.SH NAME
-@g@refer \- preprocess bibliographic references for groff
-.SH SYNOPSIS
-.nr a \n(.j
-.ad l
-.nr i \n(.i
-.in +\w'\fB@g@refer 'u
-.ti \niu
-.B @g@refer
-.de OP
-.ie \\n(.$-1 .RI "[\ \fB\\$1\fP" "\\$2" "\ ]"
-.el .RB "[\ " "\\$1" "\ ]"
-..
-.OP \-benvCPRS
-.OP \-a n
-.OP \-c fields
-.OP \-f n
-.OP \-i fields
-.OP \-k field
-.OP \-l m,n
-.OP \-p filename
-.OP \-s fields
-.OP \-t n
-.OP \-B field.macro
-.RI [\ filename \|.\|.\|.\ ]
-.br
-.ad \na
-.PP
-It is possible to have whitespace between a command line option and its
-parameter.
-.SH DESCRIPTION
-This file documents the GNU version of
-.BR refer ,
-which is part of the groff document formatting system.
-.B refer
-copies the contents of
-.IR filename \|.\|.\|.
-to the standard output,
-except that lines between
-.B .[
-and
-.B .]
-are interpreted as citations,
-and lines between
-.B .R1
-and
-.B .R2
-are interpreted as commands about how citations are to be processed.
-.LP
-Each citation specifies a reference.
-The citation can specify a reference that is contained in
-a bibliographic database by giving a set of keywords
-that only that reference contains.
-Alternatively it can specify a reference by supplying a database
-record in the citation.
-A combination of these alternatives is also possible.
-.LP
-For each citation,
-.B refer
-can produce a mark in the text.
-This mark consists of some label which can be separated from
-the text and from other labels in various ways.
-For each reference it also outputs
-.B groff
-commands that can be used by a macro package to produce a formatted
-reference for each citation.
-The output of
-.B refer
-must therefore be processed using a suitable macro package.
-The
-.B \-ms
-and
-.B \-me
-macros are both suitable.
-The commands to format a citation's reference can be output immediately after
-the citation,
-or the references may be accumulated,
-and the commands output at some later point.
-If the references are accumulated, then multiple citations of the same
-reference will produce a single formatted reference.
-.LP
-The interpretation of lines between
-.B .R1
-and
-.B .R2
-as commands is a new feature of GNU refer.
-Documents making use of this feature can still be processed by
-Unix refer just by adding the lines
-.RS
-.LP
-.nf
-.ft B
-\&.de R1
-\&.ig R2
-\&..
-.ft
-.fi
-.RE
-to the beginning of the document.
-This will cause
-.B troff
-to ignore everything between
-.B .R1
-and
-.BR .R2 .
-The effect of some commands can also be achieved by options.
-These options are supported mainly for compatibility with Unix refer.
-It is usually more convenient to use commands.
-.LP
-.B refer
-generates
-.B .lf
-lines so that filenames and line numbers in messages produced
-by commands that read
-.B refer
-output will be correct;
-it also interprets lines beginning with
-.B .lf
-so that filenames and line numbers in the messages and
-.B .lf
-lines that it produces will be accurate even if the input has been
-preprocessed by a command such as
-.BR @g@soelim (@MAN1EXT@).
-.SH OPTIONS
-.LP
-Most options are equivalent to commands
-(for a description of these commands see the
-.B Commands
-subsection):
-.TP
-.B \-b
-.B
-no-label-in-text; no-label-in-reference
-.TP
-.B \-e
-.B accumulate
-.TP
-.B \-n
-.B no-default-database
-.TP
-.B \-C
-.B compatible
-.TP
-.B \-P
-.B move-punctuation
-.TP
-.B \-S
-.B
-label "(A.n|Q) ', ' (D.y|D)"; bracket-label " (" ) "; "
-.TP
-.BI \-a n
-.B reverse
-.BI A n
-.TP
-.BI \-c fields
-.B capitalize
-.I fields
-.TP
-.BI \-f n
-.B label
-.BI % n
-.TP
-.BI \-i fields
-.B search-ignore
-.I fields
-.TP
-.B \-k
-.B label
-.B L\(ti%a
-.TP
-.BI \-k field
-.B label
-.IB field \(ti%a
-.TP
-.B \-l
-.B label
-.BI A.nD.y%a
-.TP
-.BI \-l m
-.B label
-.BI A.n+ m D.y%a
-.TP
-.BI \-l, n
-.B label
-.BI A.nD.y\- n %a
-.TP
-.BI \-l m , n
-.B label
-.BI A.n+ m D.y\- n %a
-.TP
-.BI \-p filename
-.B database
-.I filename
-.TP
-.BI \-s spec
-.B sort
-.I spec
-.TP
-.BI \-t n
-.B search-truncate
-.I n
-.LP
-These options are equivalent to the following commands with the
-addition that the filenames specified on the command line are
-processed as if they were arguments to the
-.B bibliography
-command instead of in the normal way:
-.TP
-.B \-B
-.B
-annotate X AP; no-label-in-reference
-.TP
-.BI \-B field . macro
-.B annotate
-.I field
-.IB macro ;
-.B no-label-in-reference
-.LP
-The following options have no equivalent commands:
-.TP
-.B \-v
-Print the version number.
-.TP
-.B \-R
-Don't recognize lines beginning with
-.BR .R1 / .R2 .
-.SH USAGE
-.SS Bibliographic databases
-The bibliographic database is a text file consisting of records
-separated by one or more blank lines.
-Within each record fields start with a
-.B %
-at the beginning of a line.
-Each field has a one character name that immediately follows the
-.BR % .
-It is best to use only upper and lower case letters for the names
-of fields.
-The name of the field should be followed by exactly one space,
-and then by the contents of the field.
-Empty fields are ignored.
-The conventional meaning of each field is as follows:
-.TP
-.B A
-The name of an author.
-If the name contains a title such as
-.B Jr.
-at the end,
-it should be separated from the last name by a comma.
-There can be multiple occurrences of the
-.B A
-field.
-The order is significant.
-It is a good idea always to supply an
-.B A
-field or a
-.B Q
-field.
-.TP
-.B B
-For an article that is part of a book, the title of the book
-.TP
-.B C
-The place (city) of publication.
-.TP
-.B D
-The date of publication.
-The year should be specified in full.
-If the month is specified, the name rather than the number of the month
-should be used, but only the first three letters are required.
-It is a good idea always to supply a
-.B D
-field;
-if the date is unknown, a value such as
-.B in press
-or
-.B unknown
-can be used.
-.TP
-.B E
-For an article that is part of a book, the name of an editor of the book.
-Where the work has editors and no authors,
-the names of the editors should be given as
-.B A
-fields and
-.B ,\ (ed)
-or
-.B ,\ (eds)
-should be appended to the last author.
-.TP
-.B G
-US Government ordering number.
-.TP
-.B I
-The publisher (issuer).
-.TP
-.B J
-For an article in a journal, the name of the journal.
-.TP
-.B K
-Keywords to be used for searching.
-.TP
-.B L
-Label.
-.TP
-.B N
-Journal issue number.
-.TP
-.B O
-Other information.
-This is usually printed at the end of the reference.
-.TP
-.B P
-Page number.
-A range of pages can be specified as
-.IB m \- n\fR.
-.TP
-.B Q
-The name of the author, if the author is not a person.
-This will only be used if there are no
-.B A
-fields.
-There can only be one
-.B Q
-field.
-.TP
-.B R
-Technical report number.
-.TP
-.B S
-Series name.
-.TP
-.B T
-Title.
-For an article in a book or journal,
-this should be the title of the article.
-.TP
-.B V
-Volume number of the journal or book.
-.TP
-.B X
-Annotation.
-.LP
-For all fields except
-.B A
-and
-.BR E ,
-if there is more than one occurrence of a particular field in a record,
-only the last such field will be used.
-.LP
-If accent strings are used, they should follow the character to be accented.
-This means that the
-.B AM
-macro must be used with the
-.B \-ms
-macros.
-Accent strings should not be quoted:
-use one
-.B \e
-rather than two.
-.SS Citations
-The format of a citation is
-.RS
-.BI .[ opening-text
-.br
-.I
-flags keywords
-.br
-.I fields
-.br
-.BI .] closing-text
-.RE
-.LP
-The
-.IR opening-text ,
-.IR closing-text
-and
-.I flags
-components are optional.
-Only one of the
-.I keywords
-and
-.I fields
-components need be specified.
-.LP
-The
-.I keywords
-component says to search the bibliographic databases for a reference
-that contains all the words in
-.IR keywords .
-It is an error if more than one reference if found.
-.LP
-The
-.I fields
-components specifies additional fields to replace or supplement
-those specified in the reference.
-When references are being accumulated and the
-.I keywords
-component is non-empty,
-then additional fields should be specified only on the first
-occasion that a particular reference is cited,
-and will apply to all citations of that reference.
-.LP
-The
-.I opening-text
-and
-.I closing-text
-component specifies strings to be used to bracket the label instead
-of the strings specified in the
-.B bracket-label
-command.
-If either of these components is non-empty,
-the strings specified in the
-.B bracket-label
-command will not be used;
-this behaviour can be altered using the
-.B [
-and
-.B ]
-flags.
-Note that leading and trailing spaces are significant for these components.
-.LP
-The
-.I flags
-component is a list of
-non-alphanumeric characters each of which modifies the treatment
-of this particular citation.
-Unix refer will treat these flags as part of the keywords and
-so will ignore them since they are non-alphanumeric.
-The following flags are currently recognized:
-.TP
-.B #
-This says to use the label specified by the
-.B short-label
-command,
-instead of that specified by the
-.B label
-command.
-If no short label has been specified, the normal label will be used.
-Typically the short label is used with author-date labels
-and consists of only the date and possibly a disambiguating letter;
-the
-.B #
-is supposed to be suggestive of a numeric type of label.
-.TP
-.B [
-Precede
-.I opening-text
-with the first string specified in the
-.B bracket-label
-command.
-.TP
-.B ]
-Follow
-.I closing-text
-with the second string specified in the
-.B bracket-label
-command.
-.LP
-One advantages of using the
-.B [
-and
-.B ]
-flags rather than including the brackets in
-.I opening-text
-and
-.I closing-text
-is that
-you can change the style of bracket used in the document just by changing the
-.B bracket-label
-command.
-Another advantage is that sorting and merging of citations
-will not necessarily be inhibited if the flags are used.
-.LP
-If a label is to be inserted into the text,
-it will be attached to the line preceding the
-.B .[
-line.
-If there is no such line, then an extra line will be inserted before the
-.B .[
-line and a warning will be given.
-.LP
-There is no special notation for making a citation to multiple references.
-Just use a sequence of citations, one for each reference.
-Don't put anything between the citations.
-The labels for all the citations will be attached to the line preceding
-the first citation.
-The labels may also be sorted or merged.
-See the description of the
-.B <>
-label expression, and of the
-.B sort-adjacent-labels
-and
-.B abbreviate-label-ranges
-command.
-A label will not be merged if its citation has a non-empty
-.I opening-text
-or
-.IR closing-text .
-However, the labels for a citation using the
-.B ]
-flag and without any
-.I closing-text
-immediately followed by a citation using the
-.B [
-flag and without any
-.I opening-text
-may be sorted and merged
-even though the first citation's
-.I opening-text
-or the second citation's
-.I closing-text
-is non-empty.
-(If you wish to prevent this just make the first citation's
-.I closing-text
-.BR \e& .)
-.SS Commands
-Commands are contained between lines starting with
-.B .R1
-and
-.BR .R2 .
-Recognition of these lines can be prevented by the
-.B \-R
-option.
-When a
-.B .R1
-line is recognized any accumulated references are flushed out.
-Neither
-.B .R1
-nor
-.B .R2
-lines,
-nor anything between them
-is output.
-.LP
-Commands are separated by newlines or
-.BR ; s.
-.B #
-introduces a comment that extends to the end of the line
-(but does not conceal the newline).
-Each command is broken up into words.
-Words are separated by spaces or tabs.
-A word that begins with
-.B \(ts
-extends to the next
-.B \(ts
-that is not followed by another
-.BR \(ts .
-If there is no such
-.B \(ts
-the word extends to the end of the line.
-Pairs of
-.B \(ts
-in a word beginning with
-.B \(ts
-collapse to a single
-.BR \(ts .
-Neither
-.B #
-nor
-.B ;
-are recognized inside
-.BR \(ts s.
-A line can be continued by ending it with
-.BR \e ;
-this works everywhere except after a
-.BR # .
-.LP
-.ds n \fR*
-Each command
-.I name
-that is marked with \*n has an associated negative command
-.BI no- name
-that undoes the effect of
-.IR name .
-For example, the
-.B no-sort
-command specifies that references should not be sorted.
-The negative commands take no arguments.
-.LP
-In the following description each argument must be a single word;
-.I field
-is used for a single upper or lower case letter naming a field;
-.I fields
-is used for a sequence of such letters;
-.I m
-and
-.I n
-are used for a non-negative numbers;
-.I string
-is used for an arbitrary string;
-.I filename
-is used for the name of a file.
-.Tp \w'\fBabbreviate-label-ranges'u+2n
-.BI abbreviate\*n\ fields\ string1\ string2\ string3\ string4
-Abbreviate the first names of
-.IR fields .
-An initial letter will be separated from another initial letter by
-.IR string1 ,
-from the last name by
-.IR string2 ,
-and from anything else
-(such as a
-.B von
-or
-.BR de )
-by
-.IR string3 .
-These default to a period followed by a space.
-In a hyphenated first name,
-the initial of the first part of the name will be separated from the hyphen by
-.IR string4 ;
-this defaults to a period.
-No attempt is made to handle any ambiguities that might
-result from abbreviation.
-Names are abbreviated before sorting and before
-label construction.
-.TP
-.BI abbreviate-label-ranges\*n\ string
-Three or more adjacent labels that refer to consecutive references
-will be abbreviated to a label consisting
-of the first label, followed by
-.I string
-followed by the last label.
-This is mainly useful with numeric labels.
-If
-.I string
-is omitted it defaults to
-.BR \- .
-.TP
-.B accumulate\*n
-Accumulate references instead of writing out each reference
-as it is encountered.
-Accumulated references will be written out whenever a reference
-of the form
-.RS
-.IP
-.B .[
-.br
-.B $LIST$
-.br
-.B .]
-.LP
-is encountered,
-after all input files hve been processed,
-and whenever
-.B .R1
-line is recognized.
-.RE
-.TP
-.BI annotate\*n\ field\ string
-.I field
-is an annotation;
-print it at the end of the reference as a paragraph preceded by the line
-.RS
-.IP
-.BI . string
-.LP
-If
-.I macro
-is omitted it will default to
-.BR AP ;
-if
-.I field
-is also omitted it will default to
-.BR X .
-Only one field can be an annotation.
-.RE
-.TP
-.BI articles\ string \fR\|.\|.\|.
-.IR string \|.\|.\|.
-are definite or indefinite articles, and should be ignored at the beginning of
-.B T
-fields when sorting.
-Initially,
-.BR the ,
-.B a
-and
-.B an
-are recognized as articles.
-.TP
-.BI bibliography\ filename \fR\|.\|.\|.
-Write out all the references contained in the bibliographic databases
-.IR filename \|.\|.\|.
-.TP
-.BI bracket-label\ string1\ string2\ string3
-In the text, bracket each label
-with
-.I string1
-and
-.IR string2 .
-An occurrence of
-.I string2
-immediately followed by
-.I string1
-will be turned into
-.IR string3 .
-The default behaviour is
-.RS
-.IP
-.B
-bracket-label \e*([. \e*(.] ", "
-.RE
-.TP
-.BI capitalize\ fields
-Convert
-.I fields
-to caps and small caps.
-.TP
-.B compatible\*n
-Recognize
-.B .R1
-and
-.B .R2
-even when followed by a character other than space or newline.
-.TP
-.BI database\ filename \fR\|.\|.\|.
-Search the bibliographic databases
-.IR filename \|.\|.\|.
-For each
-.I filename
-if an index
-.IB filename @INDEX_SUFFIX@
-created by
-.BR @g@indxbib (@MAN1EXT@)
-exists, then it will be searched instead;
-each index can cover multiple databases.
-.TP
-.BI date-as-label\*n\ string
-.I string
-is a label expression that specifies a string with which to replace the
-.B D
-field after constructing the label.
-See the
-.B "Label expressions"
-subsection for a description of label expressions.
-This command is useful if you do not want explicit labels in the
-reference list, but instead want to handle any necessary
-disambiguation by qualifying the date in some way.
-The label used in the text would typically be some combination of the
-author and date.
-In most cases you should also use the
-.B no-label-in-reference
-command.
-For example,
-.RS
-.IP
-.B
-date-as-label D.+yD.y%a*D.-y
-.LP
-would attach a disambiguating letter to the year part of the
-.B D
-field in the reference.
-.RE
-.TP
-.B default-database\*n
-The default database should be searched.
-This is the default behaviour, so the negative version of
-this command is more useful.
-refer determines whether the default database should be searched
-on the first occasion that it needs to do a search.
-Thus a
-.B no-default-database
-command must be given before then,
-in order to be effective.
-.TP
-.BI discard\*n\ fields
-When the reference is read,
-.I fields
-should be discarded;
-no string definitions for
-.I fields
-will be output.
-Initially,
-.I fields
-are
-.BR XYZ .
-.TP
-.BI et-al\*n\ string\ m\ n
-Control use of
-.B
-et al
-in the evaluation of
-.B @
-expressions in label expressions.
-If the number of authors needed to make the author sequence
-unambiguous is
-.I u
-and the total number of authors is
-.I t
-then the last
-.IR t \|\-\| u
-authors will be replaced by
-.I string
-provided that
-.IR t \|\-\| u
-is not less than
-.I m
-and
-.I t
-is not less than
-.IR n .
-The default behaviour is
-.RS
-.IP
-.B
-et-al " et al" 2 3
-.RE
-.TP
-.BI include\ filename
-Include
-.I filename
-and interpret the contents as commands.
-.TP
-.BI join-authors\ string1\ string2\ string3
-This says how authors should be joined together.
-When there are exactly two authors, they will be joined with
-.IR string1 .
-When there are more than two authors, all but the last two will
-be joined with
-.IR string2 ,
-and the last two authors will be joined with
-.IR string3 .
-If
-.I string3
-is omitted,
-it will default to
-.IR string1 ;
-if
-.I string2
-is also omitted it will also default to
-.IR string1 .
-For example,
-.RS
-.IP
-.B
-join-authors " and " ", " ", and "
-.LP
-will restore the default method for joining authors.
-.RE
-.TP
-.B label-in-reference\*n
-When outputting the reference,
-define the string
-.B [F
-to be the reference's label.
-This is the default behaviour; so the negative version
-of this command is more useful.
-.TP
-.B label-in-text\*n
-For each reference output a label in the text.
-The label will be separated from the surrounding text as described in the
-.B bracket-label
-command.
-This is the default behaviour; so the negative version
-of this command is more useful.
-.TP
-.BI label\ string
-.I string
-is a label expression describing how to label each reference.
-.TP
-.BI separate-label-second-parts\ string
-When merging two-part labels, separate the second part of the second
-label from the first label with
-.IR string .
-See the description of the
-.B <>
-label expression.
-.TP
-.B move-punctuation\*n
-In the text, move any punctuation at the end of line past the label.
-It is usually a good idea to give this command unless you are using
-superscripted numbers as labels.
-.TP
-.BI reverse\*n\ string
-Reverse the fields whose names
-are in
-.IR string .
-Each field name can be followed by a number which says
-how many such fields should be reversed.
-If no number is given for a field, all such fields will be reversed.
-.TP
-.BI search-ignore\*n\ fields
-While searching for keys in databases for which no index exists,
-ignore the contents of
-.IR fields .
-Initially, fields
-.B XYZ
-are ignored.
-.TP
-.BI search-truncate\*n\ n
-Only require the first
-.I n
-characters of keys to be given.
-In effect when searching for a given key
-words in the database are truncated to the maximum of
-.I n
-and the length of the key.
-Initially
-.I n
-is 6.
-.TP
-.BI short-label\*n\ string
-.I string
-is a label expression that specifies an alternative (usually shorter)
-style of label.
-This is used when the
-.B #
-flag is given in the citation.
-When using author-date style labels, the identity of the author
-or authors is sometimes clear from the context, and so it
-may be desirable to omit the author or authors from the label.
-The
-.B short-label
-command will typically be used to specify a label containing just
-a date and possibly a disambiguating letter.
-.TP
-.BI sort\*n\ string
-Sort references according to
-.BR string .
-References will automatically be accumulated.
-.I string
-should be a list of field names, each followed by a number,
-indicating how many fields with the name should be used for sorting.
-.B +
-can be used to indicate that all the fields with the name should be used.
-Also
-.B .
-can be used to indicate the references should be sorted using the
-(tentative) label.
-(The
-.B
-Label expressions
-subsection describes the concept of a tentative label.)
-.TP
-.B sort-adjacent-labels\*n
-Sort labels that are adjacent in the text according to their
-position in the reference list.
-This command should usually be given if the
-.B abbreviate-label-ranges
-command has been given,
-or if the label expression contains a
-.B <>
-expression.
-This will have no effect unless references are being accumulated.
-.SS Label expressions
-.LP
-Label expressions can be evaluated both normally and tentatively.
-The result of normal evaluation is used for output.
-The result of tentative evaluation, called the
-.I
-tentative label,
-is used to gather the information
-that normal evaluation needs to disambiguate the label.
-Label expressions specified by the
-.B date-as-label
-and
-.B short-label
-commands are not evaluated tentatively.
-Normal and tentative evaluation are the same for all types
-of expression other than
-.BR @ ,
-.BR * ,
-and
-.B %
-expressions.
-The description below applies to normal evaluation,
-except where otherwise specified.
-.TP
-.I field
-.TQ
-.I field\ n
-The
-.IR n -th
-part of
-.IR field .
-If
-.I n
-is omitted, it defaults to 1.
-.TP
-.BI ' string '
-The characters in
-.I string
-literally.
-.TP
-.B @
-All the authors joined as specified by the
-.B join-authors
-command.
-The whole of each author's name will be used.
-However, if the references are sorted by author
-(that is the sort specification starts with
-.BR A+ ),
-then authors' last names will be used instead, provided that this does
-not introduce ambiguity,
-and also an initial subsequence of the authors may be used
-instead of all the authors, again provided that this does not
-introduce ambiguity.
-The use of only the last name for the
-.IR i -th
-author of some reference
-is considered to be ambiguous if
-there is some other reference,
-such that the first
-.IR i \|-\|1
-authors of the references are the same,
-the
-.IR i -th
-authors are not the same,
-but the
-.IR i -th
-authors' last names are the same.
-A proper initial subsequence of the sequence
-of authors for some reference is considered to be ambiguous if there is
-a reference with some other sequence of authors which also has
-that subsequence as a proper initial subsequence.
-When an initial subsequence of authors is used, the remaining
-authors are replaced by the string specified by the
-.B et-al
-command;
-this command may also specify additional requirements that must be
-met before an initial subsequence can be used.
-.B @
-tentatively evaluates to a canonical representation of the authors,
-such that authors that compare equally for sorting purpose
-will have the same representation.
-.TP
-.BI % n
-.TQ
-.B %a
-.TQ
-.B %A
-.TQ
-.B %i
-.TQ
-.B %I
-The serial number of the reference formatted according to the character
-following the
-.BR % .
-The serial number of a reference is 1 plus the number of earlier references
-with same tentative label as this reference.
-These expressions tentatively evaluate to an empty string.
-.TP
-.IB expr *
-If there is another reference with the same tentative label as
-this reference, then
-.IR expr ,
-otherwise an empty string.
-It tentatively evaluates to an empty string.
-.TP
-.IB expr + n
-.TQ
-.IB expr \- n
-The first
-.RB ( + )
-or last
-.RB ( \- )
-.I n
-upper or lower case letters or digits of
-.IR expr .
-Troff special characters (such as
-.BR \e('a )
-count as a single letter.
-Accent strings are retained but do not count towards the total.
-.TP
-.IB expr .l
-.I expr
-converted to lowercase.
-.TP
-.IB expr .u
-.I expr
-converted to uppercase.
-.TP
-.IB expr .c
-.I expr
-converted to caps and small caps.
-.TP
-.IB expr .r
-.I expr
-reversed so that the last name is first.
-.TP
-.IB expr .a
-.I expr
-with first names abbreviated.
-Note that fields specified in the
-.B abbreviate
-command are abbreviated before any labels are evaluated.
-Thus
-.B .a
-is useful only when you want a field to be abbreviated in a label
-but not in a reference.
-.TP
-.IB expr .y
-The year part of
-.IR expr .
-.TP
-.IB expr .+y
-The part of
-.I expr
-before the year, or the whole of
-.I expr
-if it does not contain a year.
-.TP
-.IB expr .\-y
-The part of
-.I expr
-after the year, or an empty string if
-.I expr
-does not contain a year.
-.TP
-.IB expr .n
-The last name part of
-.IR expr .
-.TP
-.IB expr1 \(ti expr2
-.I expr1
-except that if the last character of
-.I expr1
-is
-.B \-
-then it will be replaced by
-.IR expr2 .
-.TP
-.I expr1\ expr2
-The concatenation of
-.I expr1
-and
-.IR expr2 .
-.TP
-.IB expr1 | expr2
-If
-.I expr1
-is non-empty then
-.I expr1
-otherwise
-.IR expr2 .
-.TP
-.IB expr1 & expr2
-If
-.I expr1
-is non-empty
-then
-.I expr2
-otherwise an empty string.
-.TP
-.IB expr1 ? expr2 : expr3
-If
-.I expr1
-is non-empty
-then
-.I expr2
-otherwise
-.IR expr3 .
-.TP
-.BI < expr >
-The label is in two parts, which are separated by
-.IR expr .
-Two adjacent two-part labels which have the same first part will be
-merged by appending the second part of the second label onto the first
-label separated by the string specified in the
-.B separate-label-second-parts
-command (initially, a comma followed by a space); the resulting label
-will also be a two-part label with the same first part as before
-merging, and so additional labels can be merged into it.
-Note that it is permissible for the first part to be empty;
-this maybe desirable for expressions used in the
-.B short-label
-command.
-.TP
-.BI ( expr )
-The same as
-.IR expr .
-Used for grouping.
-.LP
-The above expressions are listed in order of precedence
-(highest first);
-.B &
-and
-.B |
-have the same precedence.
-.SS Macro interface
-Each reference starts with a call to the macro
-.BR ]- .
-The string
-.B [F
-will be defined to be the label for this reference,
-unless the
-.B no-label-in-reference
-command has been given.
-There then follows a series of string definitions,
-one for each field:
-string
-.BI [ X
-corresponds to field
-.IR X .
-The number register
-.B [P
-is set to 1 if the
-.B P
-field contains a range of pages.
-The
-.BR [T ,
-.B [A
-and
-.B [O
-number registers are set to 1 according as the
-.BR T ,
-.B A
-and
-.B O
-fields end with one of the characters
-.BR .?! .
-The
-.B [E
-number register will be set to 1 if the
-.B [E
-string contains more than one name.
-The reference is followed by a call to the
-.B ][
-macro.
-The first argument to this macro gives a number representing
-the type of the reference.
-If a reference contains a
-.B J
-field, it will be classified as type 1,
-otherwise if it contains a
-.B B
-field, it will type 3,
-otherwise if it contains a
-.B G
-or
-.B R
-field it will be type 4,
-otherwise if contains a
-.B I
-field it will be type 2,
-otherwise it will be type 0.
-The second argument is a symbolic name for the type:
-.BR other ,
-.BR journal-article ,
-.BR book ,
-.B article-in-book
-or
-.BR tech-report .
-Groups of references that have been accumulated
-or are produced by the
-.B bibliography
-command are preceded by a call to the
-.B ]<
-macro and followed by a call to the
-.B ]>
-macro.
-.SH FILES
-.Tp \w'\fB@DEFAULT_INDEX@'u+2n
-.B @DEFAULT_INDEX@
-Default database.
-.TP
-.IB file @INDEX_SUFFIX@
-Index files.
-.SH "SEE ALSO"
-.BR @g@indxbib (@MAN1EXT@),
-.BR @g@lookbib (@MAN1EXT@),
-.BR lkbib (@MAN1EXT@)
-.br
-.SH BUGS
-In label expressions,
-.B <>
-expressions are ignored inside
-.BI . char
-expressions.
diff --git a/contrib/groff/refer/token.cc b/contrib/groff/refer/token.cc
deleted file mode 100644
index 1cf6890f24d4..000000000000
--- a/contrib/groff/refer/token.cc
+++ /dev/null
@@ -1,378 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-#include "refer.h"
-#include "token.h"
-
-#define TOKEN_TABLE_SIZE 1009
-// I believe in Icelandic thorn sorts after z.
-#define THORN_SORT_KEY "{"
-
-struct token_table_entry {
- const char *tok;
- token_info ti;
- token_table_entry();
-};
-
-token_table_entry token_table[TOKEN_TABLE_SIZE];
-int ntokens = 0;
-
-static void skip_name(const char **ptr, const char *end)
-{
- if (*ptr < end) {
- switch (*(*ptr)++) {
- case '(':
- if (*ptr < end) {
- *ptr += 1;
- if (*ptr < end)
- *ptr += 1;
- }
- break;
- case '[':
- while (*ptr < end)
- if (*(*ptr)++ == ']')
- break;
- break;
- }
- }
-}
-
-int get_token(const char **ptr, const char *end)
-{
- if (*ptr >= end)
- return 0;
- char c = *(*ptr)++;
- if (c == '\\' && *ptr < end) {
- switch (**ptr) {
- default:
- *ptr += 1;
- break;
- case '(':
- case '[':
- skip_name(ptr, end);
- break;
- case '*':
- case 'f':
- *ptr += 1;
- skip_name(ptr, end);
- break;
- }
- }
- return 1;
-}
-
-token_info::token_info()
-: type(TOKEN_OTHER), sort_key(0), other_case(0)
-{
-}
-
-void token_info::set(token_type t, const char *sk, const char *oc)
-{
- assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER);
- type = t;
- sort_key = sk;
- other_case = oc;
-}
-
-void token_info::sortify(const char *start, const char *end, string &result)
- const
-{
- if (sort_key)
- result += sort_key;
- else if (type == TOKEN_UPPER || type == TOKEN_LOWER) {
- for (; start < end; start++)
- if (csalpha(*start))
- result += cmlower(*start);
- }
-}
-
-int token_info::sortify_non_empty(const char *start, const char *end) const
-{
- if (sort_key)
- return *sort_key != '\0';
- if (type != TOKEN_UPPER && type != TOKEN_LOWER)
- return 0;
- for (; start < end; start++)
- if (csalpha(*start))
- return 1;
- return 0;
-}
-
-
-void token_info::lower_case(const char *start, const char *end,
- string &result) const
-{
- if (type != TOKEN_UPPER) {
- while (start < end)
- result += *start++;
- }
- else if (other_case)
- result += other_case;
- else {
- while (start < end)
- result += cmlower(*start++);
- }
-}
-
-void token_info::upper_case(const char *start, const char *end,
- string &result) const
-{
- if (type != TOKEN_LOWER) {
- while (start < end)
- result += *start++;
- }
- else if (other_case)
- result += other_case;
- else {
- while (start < end)
- result += cmupper(*start++);
- }
-}
-
-token_table_entry::token_table_entry()
-: tok(0)
-{
-}
-
-static void store_token(const char *tok, token_type typ,
- const char *sk = 0, const char *oc = 0)
-{
- unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE;
- for (;;) {
- if (token_table[n].tok == 0) {
- if (++ntokens == TOKEN_TABLE_SIZE)
- assert(0);
- token_table[n].tok = tok;
- break;
- }
- if (strcmp(tok, token_table[n].tok) == 0)
- break;
- if (n == 0)
- n = TOKEN_TABLE_SIZE - 1;
- else
- --n;
- }
- token_table[n].ti.set(typ, sk, oc);
-}
-
-
-token_info default_token_info;
-
-const token_info *lookup_token(const char *start, const char *end)
-{
- unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE;
- for (;;) {
- if (token_table[n].tok == 0)
- break;
- if (strlen(token_table[n].tok) == end - start
- && memcmp(token_table[n].tok, start, end - start) == 0)
- return &(token_table[n].ti);
- if (n == 0)
- n = TOKEN_TABLE_SIZE - 1;
- else
- --n;
- }
- return &default_token_info;
-}
-
-static void init_ascii()
-{
- const char *p;
- for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) {
- char buf[2];
- buf[0] = *p;
- buf[1] = '\0';
- store_token(strsave(buf), TOKEN_LOWER);
- buf[0] = cmupper(buf[0]);
- store_token(strsave(buf), TOKEN_UPPER);
- }
- for (p = "0123456789"; *p; p++) {
- char buf[2];
- buf[0] = *p;
- buf[1] = '\0';
- const char *s = strsave(buf);
- store_token(s, TOKEN_OTHER, s);
- }
- for (p = ".,:;?!"; *p; p++) {
- char buf[2];
- buf[0] = *p;
- buf[1] = '\0';
- store_token(strsave(buf), TOKEN_PUNCT);
- }
- store_token("-", TOKEN_HYPHEN);
-}
-
-static void store_letter(const char *lower, const char *upper,
- const char *sort_key = 0)
-{
- store_token(lower, TOKEN_LOWER, sort_key, upper);
- store_token(upper, TOKEN_UPPER, sort_key, lower);
-}
-
-static void init_letter(unsigned char uc_code, unsigned char lc_code,
- const char *sort_key)
-{
- char lbuf[2];
- lbuf[0] = lc_code;
- lbuf[1] = 0;
- char ubuf[2];
- ubuf[0] = uc_code;
- ubuf[1] = 0;
- store_letter(strsave(lbuf), strsave(ubuf), sort_key);
-}
-
-static void init_latin1()
-{
- init_letter(0xc0, 0xe0, "a");
- init_letter(0xc1, 0xe1, "a");
- init_letter(0xc2, 0xe2, "a");
- init_letter(0xc3, 0xe3, "a");
- init_letter(0xc4, 0xe4, "a");
- init_letter(0xc5, 0xe5, "a");
- init_letter(0xc6, 0xe6, "ae");
- init_letter(0xc7, 0xe7, "c");
- init_letter(0xc8, 0xe8, "e");
- init_letter(0xc9, 0xe9, "e");
- init_letter(0xca, 0xea, "e");
- init_letter(0xcb, 0xeb, "e");
- init_letter(0xcc, 0xec, "i");
- init_letter(0xcd, 0xed, "i");
- init_letter(0xce, 0xee, "i");
- init_letter(0xcf, 0xef, "i");
-
- init_letter(0xd0, 0xf0, "d");
- init_letter(0xd1, 0xf1, "n");
- init_letter(0xd2, 0xf2, "o");
- init_letter(0xd3, 0xf3, "o");
- init_letter(0xd4, 0xf4, "o");
- init_letter(0xd5, 0xf5, "o");
- init_letter(0xd6, 0xf6, "o");
- init_letter(0xd8, 0xf8, "o");
- init_letter(0xd9, 0xf9, "u");
- init_letter(0xda, 0xfa, "u");
- init_letter(0xdb, 0xfb, "u");
- init_letter(0xdc, 0xfc, "u");
- init_letter(0xdd, 0xfd, "y");
- init_letter(0xde, 0xfe, THORN_SORT_KEY);
-
- store_token("\337", TOKEN_LOWER, "ss", "SS");
- store_token("\377", TOKEN_LOWER, "y", "Y");
-}
-
-static void init_two_char_letter(char l1, char l2, char u1, char u2,
- const char *sk = 0)
-{
- char buf[6];
- buf[0] = '\\';
- buf[1] = '(';
- buf[2] = l1;
- buf[3] = l2;
- buf[4] = '\0';
- const char *p = strsave(buf);
- buf[2] = u1;
- buf[3] = u2;
- store_letter(p, strsave(buf), sk);
- buf[1] = '[';
- buf[4] = ']';
- buf[5] = '\0';
- p = strsave(buf);
- buf[2] = l1;
- buf[3] = l2;
- store_letter(strsave(buf), p, sk);
-
-}
-
-static void init_special_chars()
-{
- const char *p;
- for (p = "':^`~"; *p; p++)
- for (const char *q = "aeiouy"; *q; q++) {
- // Use a variable to work around bug in gcc 2.0
- char c = cmupper(*q);
- init_two_char_letter(*p, *q, *p, c);
- }
- for (p = "/l/o~n,coeaeij"; *p; p += 2) {
- // Use variables to work around bug in gcc 2.0
- char c0 = cmupper(p[0]);
- char c1 = cmupper(p[1]);
- init_two_char_letter(p[0], p[1], c0, c1);
- }
- init_two_char_letter('v', 's', 'v', 'S', "s");
- init_two_char_letter('v', 'z', 'v', 'Z', "z");
- init_two_char_letter('o', 'a', 'o', 'A', "a");
- init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY);
- init_two_char_letter('-', 'd', '-', 'D');
-
- store_token("\\(ss", TOKEN_LOWER, 0, "SS");
- store_token("\\[ss]", TOKEN_LOWER, 0, "SS");
-
- store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D");
- store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]");
- store_token("\\(hy", TOKEN_HYPHEN);
- store_token("\\[hy]", TOKEN_HYPHEN);
- store_token("\\(en", TOKEN_RANGE_SEP);
- store_token("\\[en]", TOKEN_RANGE_SEP);
-}
-
-static void init_strings()
-{
- char buf[6];
- buf[0] = '\\';
- buf[1] = '*';
- for (const char *p = "'`^^,:~v_o./;"; *p; p++) {
- buf[2] = *p;
- buf[3] = '\0';
- store_token(strsave(buf), TOKEN_ACCENT);
- buf[2] = '[';
- buf[3] = *p;
- buf[4] = ']';
- buf[5] = '\0';
- store_token(strsave(buf), TOKEN_ACCENT);
- }
-
- // -ms special letters
- store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY);
- store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY);
- store_letter("\\*(d-", "\\*(D-");
- store_letter("\\*[d-]", "\\*[D-]");
- store_letter("\\*(ae", "\\*(Ae", "ae");
- store_letter("\\*[ae]", "\\*[Ae]", "ae");
- store_letter("\\*(oe", "\\*(Oe", "oe");
- store_letter("\\*[oe]", "\\*[Oe]", "oe");
-
- store_token("\\*3", TOKEN_LOWER, "y", "Y");
- store_token("\\*8", TOKEN_LOWER, "ss", "SS");
- store_token("\\*q", TOKEN_LOWER, "o", "O");
-}
-
-struct token_initer {
- token_initer();
-};
-
-static token_initer the_token_initer;
-
-token_initer::token_initer()
-{
- init_ascii();
- init_latin1();
- init_special_chars();
- init_strings();
- default_token_info.set(TOKEN_OTHER);
-}
diff --git a/contrib/groff/refer/token.h b/contrib/groff/refer/token.h
deleted file mode 100644
index 6da430d6abad..000000000000
--- a/contrib/groff/refer/token.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// -*- C++ -*-
-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
- Written by James Clark (jjc@jclark.com)
-
-This file is part of groff.
-
-groff is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free
-Software Foundation; either version 2, or (at your option) any later
-version.
-
-groff is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License along
-with groff; see the file COPYING. If not, write to the Free Software
-Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
-
-enum token_type {
- TOKEN_OTHER,
- TOKEN_UPPER,
- TOKEN_LOWER,
- TOKEN_ACCENT,
- TOKEN_PUNCT,
- TOKEN_HYPHEN,
- TOKEN_RANGE_SEP
-};
-
-class token_info {
-private:
- token_type type;
- const char *sort_key;
- const char *other_case;
-public:
- token_info();
- void set(token_type, const char *sk = 0, const char *oc = 0);
- void lower_case(const char *start, const char *end, string &result) const;
- void upper_case(const char *start, const char *end, string &result) const;
- void sortify(const char *start, const char *end, string &result) const;
- int sortify_non_empty(const char *start, const char *end) const;
- int is_upper() const;
- int is_lower() const;
- int is_accent() const;
- int is_other() const;
- int is_punct() const;
- int is_hyphen() const;
- int is_range_sep() const;
-};
-
-inline int token_info::is_upper() const
-{
- return type == TOKEN_UPPER;
-}
-
-inline int token_info::is_lower() const
-{
- return type == TOKEN_LOWER;
-}
-
-inline int token_info::is_accent() const
-{
- return type == TOKEN_ACCENT;
-}
-
-inline int token_info::is_other() const
-{
- return type == TOKEN_OTHER;
-}
-
-inline int token_info::is_punct() const
-{
- return type == TOKEN_PUNCT;
-}
-
-inline int token_info::is_hyphen() const
-{
- return type == TOKEN_HYPHEN;
-}
-
-inline int token_info::is_range_sep() const
-{
- return type == TOKEN_RANGE_SEP;
-}
-
-int get_token(const char **ptr, const char *end);
-const token_info *lookup_token(const char *start, const char *end);