diff options
author | Ruslan Ermilov <ru@FreeBSD.org> | 2001-04-17 12:23:50 +0000 |
---|---|---|
committer | Ruslan Ermilov <ru@FreeBSD.org> | 2001-04-17 12:23:50 +0000 |
commit | d0bf30505c50ee6b6f217d5e2bd1a67938ce81e4 (patch) | |
tree | d0ecc32b66c84ba03b40de7da66350fc02b90d02 /contrib/groff/refer | |
parent | af2786323e797d59708a2ca7929031c965bc2dde (diff) |
This commit was generated by cvs2svn to compensate for changes in r75587,
which included commits to RCS files with non-trunk default branches.
Notes
Notes:
svn path=/head/; revision=75588
Diffstat (limited to 'contrib/groff/refer')
-rw-r--r-- | contrib/groff/refer/Makefile.sub | 23 | ||||
-rw-r--r-- | contrib/groff/refer/TODO | 124 | ||||
-rw-r--r-- | contrib/groff/refer/command.cc | 807 | ||||
-rw-r--r-- | contrib/groff/refer/command.h | 36 | ||||
-rw-r--r-- | contrib/groff/refer/label.y | 1177 | ||||
-rw-r--r-- | contrib/groff/refer/ref.cc | 1160 | ||||
-rw-r--r-- | contrib/groff/refer/ref.h | 120 | ||||
-rw-r--r-- | contrib/groff/refer/refer.cc | 1228 | ||||
-rw-r--r-- | contrib/groff/refer/refer.h | 78 | ||||
-rw-r--r-- | contrib/groff/refer/refer.man | 1302 | ||||
-rw-r--r-- | contrib/groff/refer/token.cc | 378 | ||||
-rw-r--r-- | contrib/groff/refer/token.h | 88 |
12 files changed, 0 insertions, 6521 deletions
diff --git a/contrib/groff/refer/Makefile.sub b/contrib/groff/refer/Makefile.sub deleted file mode 100644 index 1631b5e36e05..000000000000 --- a/contrib/groff/refer/Makefile.sub +++ /dev/null @@ -1,23 +0,0 @@ -PROG=refer -MAN1=refer.n -XLIBS=$(LIBBIB) $(LIBGROFF) -MLIB=$(LIBM) -OBJS=\ - command.o \ - label.o \ - ref.o \ - refer.o \ - token.o -CCSRCS=\ - $(srcdir)/command.cc \ - $(srcdir)/ref.cc \ - $(srcdir)/refer.cc \ - $(srcdir)/token.cc -HDRS=\ - $(srcdir)/refer.h \ - $(srcdir)/token.h \ - $(srcdir)/command.h \ - $(srcdir)/ref.h -GRAM=$(srcdir)/label.y -YTABC=$(srcdir)/label.cc -NAMEPREFIX=$(g) diff --git a/contrib/groff/refer/TODO b/contrib/groff/refer/TODO deleted file mode 100644 index 5bbd9bff1e8c..000000000000 --- a/contrib/groff/refer/TODO +++ /dev/null @@ -1,124 +0,0 @@ -inline references - -Some sort of macro/subroutine that can cover several references. - -move-punctuation should ignore multiple punctuation characters. - -Make the index files machine independent. - -Allow search keys to be negated (with !) to indicate that the -reference should not contain the key. Ignore negated keys during -indexed searching. - -Provide an option with lkbib and lookbib that prints the location -(filename, position) of each reference. Need to map filename_id's -back to filenames. - -Rename join-authors to join-fields. Have a separate label-join-fields -command used by @ and #. - -Have some sort of quantifier: eg $.n#A means execute `$.n' for each -instance of an A field, setting $ to that field, and then join the -results using the join-authors command. - -no-text-in-bracket command which says not to allow post_text and -pre_text when the [] flags has been given. Useful for superscripted -footnotes. - -Make it possible to translate - to \(en in page ranges. - -Trim eign a bit. - -In indexed searching discard all numeric keys except dates. - -Allow `\ ' to separate article from first word. - -%also - -Option automatically to supply [] flags in every reference. - -See if we can avoid requiring a comma before jr. and so on -in find_last_name(). - -Cache sortified authors in authors string during tentative evaluation of -label specification. - -Possibly don't allow * and % expressions in the first part of ?:, | or -& expressions. - -Handle better the case where <> occurs inside functions and in the -first operand of ~. Or perhaps implement <> using some magic character -in the string. - -Should special treatment be given to lines beginning with . in -references? (Unix refer seems to treat them like `%'). - -Add global flag to control whether all files should be stat-ed after -loading, and whether they should be stat-ed before each search. -Perhaps make this dependent on the number of files there are. - -Option to truncate keys to truncate_len in linear searching. - -Allow multiple -f options in indxbib. - -In indxbib, possibly store common words rather than common words -filename. In this case store only words that are actually present in -the file. - -Perhaps we should put out an obnoxious copyright message when lookbib -starts up. - -Provide an option that writes a file containing just the references -actually used. Useful if you want to distribute a document. - -Have a magic token such that -%A <sort stuff><magic token><print stuff> -will print as though it were -%A <print stuff> -but sort as though it were -%A <sort stuff> -Do we need this if we can specify author alternatives for sorting? -No, provided we have separate alternatives for @. - -In consider_authors when last names are ambiguous we might be able to -use just the first name and not Jr. bit. Or we might be able to -abbreviate the author. - -It ought to be possible to specify an alternative field to sort on -instead of date. (ie if there's a field giving the type of document -- -these references should sort after any years) - -Provide a way to execute a command using a command-line option. - -Option to set the label-spec as a command-line option (-L). - -Command to to specify which fields can occur multiple times: -multiple AE - -Command to specify how various fields sort: -aort-as-name A -sort-as-date D -sort-as-title T -sort-as-other O - -Command to specify which fields are author fields: -# if we don't have A use field Q -author-fields AQ - -Commands to set properties of tokens. -sortify-token \(ae ae -uppercase-token \[ae] \[AE] - -Command to set the names of months: -months january february march april may ... - -Perhaps provide some sort of macro capability: -# perhaps a macro capability -defmacro foo -annotation-field $1 -endef - -Command to control strings used in capitalization -capitalize-start \s+2 -capitalize-end \s-2 -(perhaps make these arguments to the capitalize command.) diff --git a/contrib/groff/refer/command.cc b/contrib/groff/refer/command.cc deleted file mode 100644 index 004189eeb03a..000000000000 --- a/contrib/groff/refer/command.cc +++ /dev/null @@ -1,807 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "refer.h" -#include "refid.h" -#include "search.h" -#include "command.h" - -cset cs_field_name = csalpha; - -class input_item { - input_item *next; - char *filename; - int first_lineno; - string buffer; - const char *ptr; - const char *end; -public: - input_item(string &, const char *, int = 1); - ~input_item(); - int get_char(); - int peek_char(); - void skip_char(); - int get_location(const char **, int *); - - friend class input_stack; -}; - -input_item::input_item(string &s, const char *fn, int ln) -: filename(strsave(fn)), first_lineno(ln) -{ - buffer.move(s); - ptr = buffer.contents(); - end = ptr + buffer.length(); -} - -input_item::~input_item() -{ - a_delete filename; -} - -inline int input_item::peek_char() -{ - if (ptr >= end) - return EOF; - else - return (unsigned char)*ptr; -} - -inline int input_item::get_char() -{ - if (ptr >= end) - return EOF; - else - return (unsigned char)*ptr++; -} - -inline void input_item::skip_char() -{ - ptr++; -} - -int input_item::get_location(const char **filenamep, int *linenop) -{ - *filenamep = filename; - if (ptr == buffer.contents()) - *linenop = first_lineno; - else { - int ln = first_lineno; - const char *e = ptr - 1; - for (const char *p = buffer.contents(); p < e; p++) - if (*p == '\n') - ln++; - *linenop = ln; - } - return 1; -} - -class input_stack { - static input_item *top; -public: - static void init(); - static int get_char(); - static int peek_char(); - static void skip_char() { top->skip_char(); } - static void push_file(const char *); - static void push_string(string &, const char *, int); - static void error(const char *format, - const errarg &arg1 = empty_errarg, - const errarg &arg2 = empty_errarg, - const errarg &arg3 = empty_errarg); -}; - -input_item *input_stack::top = 0; - -void input_stack::init() -{ - while (top) { - input_item *tem = top; - top = top->next; - delete tem; - } -} - -int input_stack::get_char() -{ - while (top) { - int c = top->get_char(); - if (c >= 0) - return c; - input_item *tem = top; - top = top->next; - delete tem; - } - return -1; -} - -int input_stack::peek_char() -{ - while (top) { - int c = top->peek_char(); - if (c >= 0) - return c; - input_item *tem = top; - top = top->next; - delete tem; - } - return -1; -} - -void input_stack::push_file(const char *fn) -{ - FILE *fp; - if (strcmp(fn, "-") == 0) { - fp = stdin; - fn = "<standard input>"; - } - else { - errno = 0; - fp = fopen(fn, "r"); - if (fp == 0) { - error("can't open `%1': %2", fn, strerror(errno)); - return; - } - } - string buf; - int bol = 1; - int lineno = 1; - for (;;) { - int c = getc(fp); - if (bol && c == '.') { - // replace lines beginning with .R1 or .R2 with a blank line - c = getc(fp); - if (c == 'R') { - c = getc(fp); - if (c == '1' || c == '2') { - int cc = c; - c = getc(fp); - if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { - while (c != '\n' && c != EOF) - c = getc(fp); - } - else { - buf += '.'; - buf += 'R'; - buf += cc; - } - } - else { - buf += '.'; - buf += 'R'; - } - } - else - buf += '.'; - } - if (c == EOF) - break; - if (illegal_input_char(c)) - error_with_file_and_line(fn, lineno, - "illegal input character code %1", int(c)); - else { - buf += c; - if (c == '\n') { - bol = 1; - lineno++; - } - else - bol = 0; - } - } - if (fp != stdin) - fclose(fp); - if (buf.length() > 0 && buf[buf.length() - 1] != '\n') - buf += '\n'; - input_item *it = new input_item(buf, fn); - it->next = top; - top = it; -} - -void input_stack::push_string(string &s, const char *filename, int lineno) -{ - input_item *it = new input_item(s, filename, lineno); - it->next = top; - top = it; -} - -void input_stack::error(const char *format, const errarg &arg1, - const errarg &arg2, const errarg &arg3) -{ - const char *filename; - int lineno; - for (input_item *it = top; it; it = it->next) - if (it->get_location(&filename, &lineno)) { - error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3); - return; - } - ::error(format, arg1, arg2, arg3); -} - -void command_error(const char *format, const errarg &arg1, - const errarg &arg2, const errarg &arg3) -{ - input_stack::error(format, arg1, arg2, arg3); -} - -// # not recognized in "" -// \<newline> is recognized in "" -// # does not conceal newline -// if missing closing quote, word extends to end of line -// no special treatment of \ other than before newline -// \<newline> not recognized after # -// ; allowed as alternative to newline -// ; not recognized in "" -// don't clear word_buffer; just append on -// return -1 for EOF, 0 for newline, 1 for word - -int get_word(string &word_buffer) -{ - int c = input_stack::get_char(); - for (;;) { - if (c == '#') { - do { - c = input_stack::get_char(); - } while (c != '\n' && c != EOF); - break; - } - if (c == '\\' && input_stack::peek_char() == '\n') - input_stack::skip_char(); - else if (c != ' ' && c != '\t') - break; - c = input_stack::get_char(); - } - if (c == EOF) - return -1; - if (c == '\n' || c == ';') - return 0; - if (c == '"') { - for (;;) { - c = input_stack::peek_char(); - if (c == EOF || c == '\n') - break; - input_stack::skip_char(); - if (c == '"') { - int d = input_stack::peek_char(); - if (d == '"') - input_stack::skip_char(); - else - break; - } - else if (c == '\\') { - int d = input_stack::peek_char(); - if (d == '\n') - input_stack::skip_char(); - else - word_buffer += '\\'; - } - else - word_buffer += c; - } - return 1; - } - word_buffer += c; - for (;;) { - c = input_stack::peek_char(); - if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';') - break; - input_stack::skip_char(); - if (c == '\\') { - int d = input_stack::peek_char(); - if (d == '\n') - input_stack::skip_char(); - else - word_buffer += '\\'; - } - else - word_buffer += c; - } - return 1; -} - -union argument { - const char *s; - int n; -}; - -// This is for debugging. - -static void echo_command(int argc, argument *argv) -{ - for (int i = 0; i < argc; i++) - fprintf(stderr, "%s\n", argv[i].s); -} - -static void include_command(int argc, argument *argv) -{ - assert(argc == 1); - input_stack::push_file(argv[0].s); -} - -static void capitalize_command(int argc, argument *argv) -{ - if (argc > 0) - capitalize_fields = argv[0].s; - else - capitalize_fields.clear(); -} - -static void accumulate_command(int, argument *) -{ - accumulate = 1; -} - -static void no_accumulate_command(int, argument *) -{ - accumulate = 0; -} - -static void move_punctuation_command(int, argument *) -{ - move_punctuation = 1; -} - -static void no_move_punctuation_command(int, argument *) -{ - move_punctuation = 0; -} - -static void sort_command(int argc, argument *argv) -{ - if (argc == 0) - sort_fields = "AD"; - else - sort_fields = argv[0].s; - accumulate = 1; -} - -static void no_sort_command(int, argument *) -{ - sort_fields.clear(); -} - -static void articles_command(int argc, argument *argv) -{ - articles.clear(); - int i; - for (i = 0; i < argc; i++) { - articles += argv[i].s; - articles += '\0'; - } - int len = articles.length(); - for (i = 0; i < len; i++) - articles[i] = cmlower(articles[i]); -} - -static void database_command(int argc, argument *argv) -{ - for (int i = 0; i < argc; i++) - database_list.add_file(argv[i].s); -} - -static void default_database_command(int, argument *) -{ - search_default = 1; -} - -static void no_default_database_command(int, argument *) -{ - search_default = 0; -} - -static void bibliography_command(int argc, argument *argv) -{ - const char *saved_filename = current_filename; - int saved_lineno = current_lineno; - int saved_label_in_text = label_in_text; - label_in_text = 0; - if (!accumulate) - fputs(".]<\n", stdout); - for (int i = 0; i < argc; i++) - do_bib(argv[i].s); - if (accumulate) - output_references(); - else - fputs(".]>\n", stdout); - current_filename = saved_filename; - current_lineno = saved_lineno; - label_in_text = saved_label_in_text; -} - -static void annotate_command(int argc, argument *argv) -{ - if (argc > 0) - annotation_field = argv[0].s[0]; - else - annotation_field = 'X'; - if (argc == 2) - annotation_macro = argv[1].s; - else - annotation_macro = "AP"; -} - -static void no_annotate_command(int, argument *) -{ - annotation_macro.clear(); - annotation_field = -1; -} - -static void reverse_command(int, argument *argv) -{ - reverse_fields = argv[0].s; -} - -static void no_reverse_command(int, argument *) -{ - reverse_fields.clear(); -} - -static void abbreviate_command(int argc, argument *argv) -{ - abbreviate_fields = argv[0].s; - period_before_initial = argc > 1 ? argv[1].s : ". "; - period_before_last_name = argc > 2 ? argv[2].s : ". "; - period_before_other = argc > 3 ? argv[3].s : ". "; - period_before_hyphen = argc > 4 ? argv[4].s : "."; -} - -static void no_abbreviate_command(int, argument *) -{ - abbreviate_fields.clear(); -} - -string search_ignore_fields; - -static void search_ignore_command(int argc, argument *argv) -{ - if (argc > 0) - search_ignore_fields = argv[0].s; - else - search_ignore_fields = "XYZ"; - search_ignore_fields += '\0'; - linear_ignore_fields = search_ignore_fields.contents(); -} - -static void no_search_ignore_command(int, argument *) -{ - linear_ignore_fields = ""; -} - -static void search_truncate_command(int argc, argument *argv) -{ - if (argc > 0) - linear_truncate_len = argv[0].n; - else - linear_truncate_len = 6; -} - -static void no_search_truncate_command(int, argument *) -{ - linear_truncate_len = -1; -} - -static void discard_command(int argc, argument *argv) -{ - if (argc == 0) - discard_fields = "XYZ"; - else - discard_fields = argv[0].s; - accumulate = 1; -} - -static void no_discard_command(int, argument *) -{ - discard_fields.clear(); -} - -static void label_command(int, argument *argv) -{ - set_label_spec(argv[0].s); -} - -static void abbreviate_label_ranges_command(int argc, argument *argv) -{ - abbreviate_label_ranges = 1; - label_range_indicator = argc > 0 ? argv[0].s : "-"; -} - -static void no_abbreviate_label_ranges_command(int, argument *) -{ - abbreviate_label_ranges = 0; -} - -static void label_in_reference_command(int, argument *) -{ - label_in_reference = 1; -} - -static void no_label_in_reference_command(int, argument *) -{ - label_in_reference = 0; -} - -static void label_in_text_command(int, argument *) -{ - label_in_text = 1; -} - -static void no_label_in_text_command(int, argument *) -{ - label_in_text = 0; -} - -static void sort_adjacent_labels_command(int, argument *) -{ - sort_adjacent_labels = 1; -} - -static void no_sort_adjacent_labels_command(int, argument *) -{ - sort_adjacent_labels = 0; -} - -static void date_as_label_command(int argc, argument *argv) -{ - if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*")) - date_as_label = 1; -} - -static void no_date_as_label_command(int, argument *) -{ - date_as_label = 0; -} - -static void short_label_command(int, argument *argv) -{ - if (set_short_label_spec(argv[0].s)) - short_label_flag = 1; -} - -static void no_short_label_command(int, argument *) -{ - short_label_flag = 0; -} - -static void compatible_command(int, argument *) -{ - compatible_flag = 1; -} - -static void no_compatible_command(int, argument *) -{ - compatible_flag = 0; -} - -static void join_authors_command(int argc, argument *argv) -{ - join_authors_exactly_two = argv[0].s; - join_authors_default = argc > 1 ? argv[1].s : argv[0].s; - join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s; -} - -static void bracket_label_command(int, argument *argv) -{ - pre_label = argv[0].s; - post_label = argv[1].s; - sep_label = argv[2].s; -} - -static void separate_label_second_parts_command(int, argument *argv) -{ - separate_label_second_parts = argv[0].s; -} - -static void et_al_command(int argc, argument *argv) -{ - et_al = argv[0].s; - et_al_min_elide = argv[1].n; - if (et_al_min_elide < 1) - et_al_min_elide = 1; - et_al_min_total = argc >= 3 ? argv[2].n : 0; -} - -static void no_et_al_command(int, argument *) -{ - et_al.clear(); - et_al_min_elide = 0; -} - -typedef void (*command_t)(int, argument *); - -/* arg_types is a string describing the numbers and types of arguments. -s means a string, i means an integer, f is a list of fields, F is -a single field, -? means that the previous argument is optional, * means that the -previous argument can occur any number of times. */ - -struct { - const char *name; - command_t func; - const char *arg_types; -} command_table[] = { - { "include", include_command, "s" }, - { "echo", echo_command, "s*" }, - { "capitalize", capitalize_command, "f?" }, - { "accumulate", accumulate_command, "" }, - { "no-accumulate", no_accumulate_command, "" }, - { "move-punctuation", move_punctuation_command, "" }, - { "no-move-punctuation", no_move_punctuation_command, "" }, - { "sort", sort_command, "s?" }, - { "no-sort", no_sort_command, "" }, - { "articles", articles_command, "s*" }, - { "database", database_command, "ss*" }, - { "default-database", default_database_command, "" }, - { "no-default-database", no_default_database_command, "" }, - { "bibliography", bibliography_command, "ss*" }, - { "annotate", annotate_command, "F?s?" }, - { "no-annotate", no_annotate_command, "" }, - { "reverse", reverse_command, "s" }, - { "no-reverse", no_reverse_command, "" }, - { "abbreviate", abbreviate_command, "ss?s?s?s?" }, - { "no-abbreviate", no_abbreviate_command, "" }, - { "search-ignore", search_ignore_command, "f?" }, - { "no-search-ignore", no_search_ignore_command, "" }, - { "search-truncate", search_truncate_command, "i?" }, - { "no-search-truncate", no_search_truncate_command, "" }, - { "discard", discard_command, "f?" }, - { "no-discard", no_discard_command, "" }, - { "label", label_command, "s" }, - { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" }, - { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" }, - { "label-in-reference", label_in_reference_command, "" }, - { "no-label-in-reference", no_label_in_reference_command, "" }, - { "label-in-text", label_in_text_command, "" }, - { "no-label-in-text", no_label_in_text_command, "" }, - { "sort-adjacent-labels", sort_adjacent_labels_command, "" }, - { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" }, - { "date-as-label", date_as_label_command, "s?" }, - { "no-date-as-label", no_date_as_label_command, "" }, - { "short-label", short_label_command, "s" }, - { "no-short-label", no_short_label_command, "" }, - { "compatible", compatible_command, "" }, - { "no-compatible", no_compatible_command, "" }, - { "join-authors", join_authors_command, "sss?" }, - { "bracket-label", bracket_label_command, "sss" }, - { "separate-label-second-parts", separate_label_second_parts_command, "s" }, - { "et-al", et_al_command, "sii?" }, - { "no-et-al", no_et_al_command, "" }, -}; - -static int check_args(const char *types, const char *name, - int argc, argument *argv) -{ - int argno = 0; - while (*types) { - if (argc == 0) { - if (types[1] == '?') - break; - else if (types[1] == '*') { - assert(types[2] == '\0'); - break; - } - else { - input_stack::error("missing argument for command `%1'", name); - return 0; - } - } - switch (*types) { - case 's': - break; - case 'i': - { - char *ptr; - long n = strtol(argv->s, &ptr, 10); - if ((n == 0 && ptr == argv->s) - || *ptr != '\0') { - input_stack::error("argument %1 for command `%2' must be an integer", - argno + 1, name); - return 0; - } - argv->n = (int)n; - break; - } - case 'f': - { - for (const char *ptr = argv->s; *ptr != '\0'; ptr++) - if (!cs_field_name(*ptr)) { - input_stack::error("argument %1 for command `%2' must be a list of fields", - argno + 1, name); - return 0; - } - break; - } - case 'F': - if (argv->s[0] == '\0' || argv->s[1] != '\0' - || !cs_field_name(argv->s[0])) { - input_stack::error("argument %1 for command `%2' must be a field name", - argno + 1, name); - return 0; - } - break; - default: - assert(0); - } - if (types[1] == '?') - types += 2; - else if (types[1] != '*') - types += 1; - --argc; - ++argv; - ++argno; - } - if (argc > 0) { - input_stack::error("too many arguments for command `%1'", name); - return 0; - } - return 1; -} - -static void execute_command(const char *name, int argc, argument *argv) -{ - for (int i = 0; i < sizeof(command_table)/sizeof(command_table[0]); i++) - if (strcmp(name, command_table[i].name) == 0) { - if (check_args(command_table[i].arg_types, name, argc, argv)) - (*command_table[i].func)(argc, argv); - return; - } - input_stack::error("unknown command `%1'", name); -} - -static void command_loop() -{ - string command; - for (;;) { - command.clear(); - int res = get_word(command); - if (res != 1) { - if (res == 0) - continue; - break; - } - int argc = 0; - command += '\0'; - while ((res = get_word(command)) == 1) { - argc++; - command += '\0'; - } - argument *argv = new argument[argc]; - const char *ptr = command.contents(); - for (int i = 0; i < argc; i++) - argv[i].s = ptr = strchr(ptr, '\0') + 1; - execute_command(command.contents(), argc, argv); - a_delete argv; - if (res == -1) - break; - } -} - -void process_commands(const char *file) -{ - input_stack::init(); - input_stack::push_file(file); - command_loop(); -} - -void process_commands(string &s, const char *file, int lineno) -{ - input_stack::init(); - input_stack::push_string(s, file, lineno); - command_loop(); -} diff --git a/contrib/groff/refer/command.h b/contrib/groff/refer/command.h deleted file mode 100644 index c7085db6927b..000000000000 --- a/contrib/groff/refer/command.h +++ /dev/null @@ -1,36 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -void process_commands(const char *file); -void process_commands(string &s, const char *file, int lineno); - -extern int accumulate; -extern int move_punctuation; -extern int search_default; -extern search_list database_list; -extern int label_in_text; -extern int label_in_reference; -extern int sort_adjacent_labels; -extern string pre_label; -extern string post_label; -extern string sep_label; - -extern void do_bib(const char *); -extern void output_references(); diff --git a/contrib/groff/refer/label.y b/contrib/groff/refer/label.y deleted file mode 100644 index 2c7c90951ab0..000000000000 --- a/contrib/groff/refer/label.y +++ /dev/null @@ -1,1177 +0,0 @@ -/* -*- C++ -*- - Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -%{ - -#include "refer.h" -#include "refid.h" -#include "ref.h" -#include "token.h" - -int yylex(); -void yyerror(const char *); -int yyparse(); - -static const char *format_serial(char c, int n); - -struct label_info { - int start; - int length; - int count; - int total; - label_info(const string &); -}; - -label_info *lookup_label(const string &label); - -struct expression { - enum { - // Does the tentative label depend on the reference? - CONTAINS_VARIABLE = 01, - CONTAINS_STAR = 02, - CONTAINS_FORMAT = 04, - CONTAINS_AT = 010 - }; - virtual ~expression() { } - virtual void evaluate(int, const reference &, string &, - substring_position &) = 0; - virtual unsigned analyze() { return 0; } -}; - -class at_expr : public expression { -public: - at_expr() { } - void evaluate(int, const reference &, string &, substring_position &); - unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; } -}; - -class format_expr : public expression { - char type; - int width; - int first_number; -public: - format_expr(char c, int w = 0, int f = 1) - : type(c), width(w), first_number(f) { } - void evaluate(int, const reference &, string &, substring_position &); - unsigned analyze() { return CONTAINS_FORMAT; } -}; - -class field_expr : public expression { - int number; - char name; -public: - field_expr(char nm, int num) : number(num), name(nm) { } - void evaluate(int, const reference &, string &, substring_position &); - unsigned analyze() { return CONTAINS_VARIABLE; } -}; - -class literal_expr : public expression { - string s; -public: - literal_expr(const char *ptr, int len) : s(ptr, len) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class unary_expr : public expression { -protected: - expression *expr; -public: - unary_expr(expression *e) : expr(e) { } - ~unary_expr() { delete expr; } - void evaluate(int, const reference &, string &, substring_position &) = 0; - unsigned analyze() { return expr ? expr->analyze() : 0; } -}; - -// This caches the analysis of an expression. - -class analyzed_expr : public unary_expr { - unsigned flags; -public: - analyzed_expr(expression *); - void evaluate(int, const reference &, string &, substring_position &); - unsigned analyze() { return flags; } -}; - -class star_expr : public unary_expr { -public: - star_expr(expression *e) : unary_expr(e) { } - void evaluate(int, const reference &, string &, substring_position &); - unsigned analyze() { - return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) - | CONTAINS_STAR); - } -}; - -typedef void map_func(const char *, const char *, string &); - -class map_expr : public unary_expr { - map_func *func; -public: - map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -typedef const char *extractor_func(const char *, const char *, const char **); - -class extractor_expr : public unary_expr { - int part; - extractor_func *func; -public: - enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; - extractor_expr(expression *e, extractor_func *f, int pt) - : unary_expr(e), part(pt), func(f) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class truncate_expr : public unary_expr { - int n; -public: - truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class separator_expr : public unary_expr { -public: - separator_expr(expression *e) : unary_expr(e) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class binary_expr : public expression { -protected: - expression *expr1; - expression *expr2; -public: - binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } - ~binary_expr() { delete expr1; delete expr2; } - void evaluate(int, const reference &, string &, substring_position &) = 0; - unsigned analyze() { - return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); - } -}; - -class alternative_expr : public binary_expr { -public: - alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class list_expr : public binary_expr { -public: - list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class substitute_expr : public binary_expr { -public: - substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -class ternary_expr : public expression { -protected: - expression *expr1; - expression *expr2; - expression *expr3; -public: - ternary_expr(expression *e1, expression *e2, expression *e3) - : expr1(e1), expr2(e2), expr3(e3) { } - ~ternary_expr() { delete expr1; delete expr2; delete expr3; } - void evaluate(int, const reference &, string &, substring_position &) = 0; - unsigned analyze() { - return ((expr1 ? expr1->analyze() : 0) - | (expr2 ? expr2->analyze() : 0) - | (expr3 ? expr3->analyze() : 0)); - } -}; - -class conditional_expr : public ternary_expr { -public: - conditional_expr(expression *e1, expression *e2, expression *e3) - : ternary_expr(e1, e2, e3) { } - void evaluate(int, const reference &, string &, substring_position &); -}; - -static expression *parsed_label = 0; -static expression *parsed_date_label = 0; -static expression *parsed_short_label = 0; - -static expression *parse_result; - -string literals; - -%} - -%union { - int num; - expression *expr; - struct { int ndigits; int val; } dig; - struct { int start; int len; } str; -} - -/* uppercase or lowercase letter */ -%token <num> TOKEN_LETTER -/* literal characters */ -%token <str> TOKEN_LITERAL -/* digit */ -%token <num> TOKEN_DIGIT - -%type <expr> conditional -%type <expr> alternative -%type <expr> list -%type <expr> string -%type <expr> substitute -%type <expr> optional_conditional -%type <num> number -%type <dig> digits -%type <num> optional_number -%type <num> flag - -%% - -expr: - optional_conditional - { parse_result = ($1 ? new analyzed_expr($1) : 0); } - ; - -conditional: - alternative - { $$ = $1; } - | alternative '?' optional_conditional ':' conditional - { $$ = new conditional_expr($1, $3, $5); } - ; - -optional_conditional: - /* empty */ - { $$ = 0; } - | conditional - { $$ = $1; } - ; - -alternative: - list - { $$ = $1; } - | alternative '|' list - { $$ = new alternative_expr($1, $3); } - | alternative '&' list - { $$ = new conditional_expr($1, $3, 0); } - ; - -list: - substitute - { $$ = $1; } - | list substitute - { $$ = new list_expr($1, $2); } - ; - -substitute: - string - { $$ = $1; } - | substitute '~' string - { $$ = new substitute_expr($1, $3); } - ; - -string: - '@' - { $$ = new at_expr; } - | TOKEN_LITERAL - { - $$ = new literal_expr(literals.contents() + $1.start, - $1.len); - } - | TOKEN_LETTER - { $$ = new field_expr($1, 0); } - | TOKEN_LETTER number - { $$ = new field_expr($1, $2 - 1); } - | '%' TOKEN_LETTER - { - switch ($2) { - case 'I': - case 'i': - case 'A': - case 'a': - $$ = new format_expr($2); - break; - default: - command_error("unrecognized format `%1'", char($2)); - $$ = new format_expr('a'); - break; - } - } - - | '%' digits - { - $$ = new format_expr('0', $2.ndigits, $2.val); - } - | string '.' flag TOKEN_LETTER optional_number - { - switch ($4) { - case 'l': - $$ = new map_expr($1, lowercase); - break; - case 'u': - $$ = new map_expr($1, uppercase); - break; - case 'c': - $$ = new map_expr($1, capitalize); - break; - case 'r': - $$ = new map_expr($1, reverse_name); - break; - case 'a': - $$ = new map_expr($1, abbreviate_name); - break; - case 'y': - $$ = new extractor_expr($1, find_year, $3); - break; - case 'n': - $$ = new extractor_expr($1, find_last_name, $3); - break; - default: - $$ = $1; - command_error("unknown function `%1'", char($4)); - break; - } - } - - | string '+' number - { $$ = new truncate_expr($1, $3); } - | string '-' number - { $$ = new truncate_expr($1, -$3); } - | string '*' - { $$ = new star_expr($1); } - | '(' optional_conditional ')' - { $$ = $2; } - | '<' optional_conditional '>' - { $$ = new separator_expr($2); } - ; - -optional_number: - /* empty */ - { $$ = -1; } - | number - { $$ = $1; } - ; - -number: - TOKEN_DIGIT - { $$ = $1; } - | number TOKEN_DIGIT - { $$ = $1*10 + $2; } - ; - -digits: - TOKEN_DIGIT - { $$.ndigits = 1; $$.val = $1; } - | digits TOKEN_DIGIT - { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; } - ; - - -flag: - /* empty */ - { $$ = 0; } - | '+' - { $$ = 1; } - | '-' - { $$ = -1; } - ; - -%% - -/* bison defines const to be empty unless __STDC__ is defined, which it -isn't under cfront */ - -#ifdef const -#undef const -#endif - -const char *spec_ptr; -const char *spec_end; -const char *spec_cur; - -int yylex() -{ - while (spec_ptr < spec_end && csspace(*spec_ptr)) - spec_ptr++; - spec_cur = spec_ptr; - if (spec_ptr >= spec_end) - return 0; - unsigned char c = *spec_ptr++; - if (csalpha(c)) { - yylval.num = c; - return TOKEN_LETTER; - } - if (csdigit(c)) { - yylval.num = c - '0'; - return TOKEN_DIGIT; - } - if (c == '\'') { - yylval.str.start = literals.length(); - for (; spec_ptr < spec_end; spec_ptr++) { - if (*spec_ptr == '\'') { - if (++spec_ptr < spec_end && *spec_ptr == '\'') - literals += '\''; - else { - yylval.str.len = literals.length() - yylval.str.start; - return TOKEN_LITERAL; - } - } - else - literals += *spec_ptr; - } - yylval.str.len = literals.length() - yylval.str.start; - return TOKEN_LITERAL; - } - return c; -} - -int set_label_spec(const char *label_spec) -{ - spec_cur = spec_ptr = label_spec; - spec_end = strchr(label_spec, '\0'); - literals.clear(); - if (yyparse()) - return 0; - delete parsed_label; - parsed_label = parse_result; - return 1; -} - -int set_date_label_spec(const char *label_spec) -{ - spec_cur = spec_ptr = label_spec; - spec_end = strchr(label_spec, '\0'); - literals.clear(); - if (yyparse()) - return 0; - delete parsed_date_label; - parsed_date_label = parse_result; - return 1; -} - -int set_short_label_spec(const char *label_spec) -{ - spec_cur = spec_ptr = label_spec; - spec_end = strchr(label_spec, '\0'); - literals.clear(); - if (yyparse()) - return 0; - delete parsed_short_label; - parsed_short_label = parse_result; - return 1; -} - -void yyerror(const char *message) -{ - if (spec_cur < spec_end) - command_error("label specification %1 before `%2'", message, spec_cur); - else - command_error("label specification %1 at end of string", - message, spec_cur); -} - -void at_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &) -{ - if (tentative) - ref.canonicalize_authors(result); - else { - const char *end, *start = ref.get_authors(&end); - if (start) - result.append(start, end - start); - } -} - -void format_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &) -{ - if (tentative) - return; - const label_info *lp = ref.get_label_ptr(); - int num = lp == 0 ? ref.get_number() : lp->count; - if (type != '0') - result += format_serial(type, num + 1); - else { - const char *ptr = i_to_a(num + first_number); - int pad = width - strlen(ptr); - while (--pad >= 0) - result += '0'; - result += ptr; - } -} - -static const char *format_serial(char c, int n) -{ - assert(n > 0); - static char buf[128]; // more than enough. - switch (c) { - case 'i': - case 'I': - { - char *p = buf; - // troff uses z and w to represent 10000 and 5000 in Roman - // numerals; I can find no historical basis for this usage - const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; - if (n >= 40000) - return i_to_a(n); - while (n >= 10000) { - *p++ = s[0]; - n -= 10000; - } - for (int i = 1000; i > 0; i /= 10, s += 2) { - int m = n/i; - n -= m*i; - switch (m) { - case 3: - *p++ = s[2]; - /* falls through */ - case 2: - *p++ = s[2]; - /* falls through */ - case 1: - *p++ = s[2]; - break; - case 4: - *p++ = s[2]; - *p++ = s[1]; - break; - case 8: - *p++ = s[1]; - *p++ = s[2]; - *p++ = s[2]; - *p++ = s[2]; - break; - case 7: - *p++ = s[1]; - *p++ = s[2]; - *p++ = s[2]; - break; - case 6: - *p++ = s[1]; - *p++ = s[2]; - break; - case 5: - *p++ = s[1]; - break; - case 9: - *p++ = s[2]; - *p++ = s[0]; - } - } - *p = 0; - break; - } - case 'a': - case 'A': - { - char *p = buf; - // this is derived from troff/reg.c - while (n > 0) { - int d = n % 26; - if (d == 0) - d = 26; - n -= d; - n /= 26; - *p++ = c + d - 1; // ASCII dependent - } - *p-- = 0; - // Reverse it. - char *q = buf; - while (q < p) { - char temp = *q; - *q = *p; - *p = temp; - --p; - ++q; - } - break; - } - default: - assert(0); - } - return buf; -} - -void field_expr::evaluate(int, const reference &ref, - string &result, substring_position &) -{ - const char *end; - const char *start = ref.get_field(name, &end); - if (start) { - start = nth_field(number, start, &end); - if (start) - result.append(start, end - start); - } -} - -void literal_expr::evaluate(int, const reference &, - string &result, substring_position &) -{ - result += s; -} - -analyzed_expr::analyzed_expr(expression *e) -: unary_expr(e), flags(e ? e->analyze() : 0) -{ -} - -void analyzed_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - if (expr) - expr->evaluate(tentative, ref, result, pos); -} - -void star_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - const label_info *lp = ref.get_label_ptr(); - if (!tentative - && (lp == 0 || lp->total > 1) - && expr) - expr->evaluate(tentative, ref, result, pos); -} - -void separator_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - int start_length = result.length(); - int is_first = pos.start < 0; - if (expr) - expr->evaluate(tentative, ref, result, pos); - if (is_first) { - pos.start = start_length; - pos.length = result.length() - start_length; - } -} - -void map_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &) -{ - if (expr) { - string temp; - substring_position temp_pos; - expr->evaluate(tentative, ref, temp, temp_pos); - (*func)(temp.contents(), temp.contents() + temp.length(), result); - } -} - -void extractor_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &) -{ - if (expr) { - string temp; - substring_position temp_pos; - expr->evaluate(tentative, ref, temp, temp_pos); - const char *end, *start = (*func)(temp.contents(), - temp.contents() + temp.length(), - &end); - switch (part) { - case BEFORE: - if (start) - result.append(temp.contents(), start - temp.contents()); - else - result += temp; - break; - case MATCH: - if (start) - result.append(start, end - start); - break; - case AFTER: - if (start) - result.append(end, temp.contents() + temp.length() - end); - break; - default: - assert(0); - } - } -} - -static void first_part(int len, const char *ptr, const char *end, - string &result) -{ - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, end)) - break; - const token_info *ti = lookup_token(token_start, ptr); - int counts = ti->sortify_non_empty(token_start, ptr); - if (counts && --len < 0) - break; - if (counts || ti->is_accent()) - result.append(token_start, ptr - token_start); - } -} - -static void last_part(int len, const char *ptr, const char *end, - string &result) -{ - const char *start = ptr; - int count = 0; - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, end)) - break; - const token_info *ti = lookup_token(token_start, ptr); - if (ti->sortify_non_empty(token_start, ptr)) - count++; - } - ptr = start; - int skip = count - len; - if (skip > 0) { - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, end)) - assert(0); - const token_info *ti = lookup_token(token_start, ptr); - if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) { - ptr = token_start; - break; - } - } - } - first_part(len, ptr, end, result); -} - -void truncate_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &) -{ - if (expr) { - string temp; - substring_position temp_pos; - expr->evaluate(tentative, ref, temp, temp_pos); - const char *start = temp.contents(); - const char *end = start + temp.length(); - if (n > 0) - first_part(n, start, end, result); - else if (n < 0) - last_part(-n, start, end, result); - } -} - -void alternative_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - int start_length = result.length(); - if (expr1) - expr1->evaluate(tentative, ref, result, pos); - if (result.length() == start_length && expr2) - expr2->evaluate(tentative, ref, result, pos); -} - -void list_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - if (expr1) - expr1->evaluate(tentative, ref, result, pos); - if (expr2) - expr2->evaluate(tentative, ref, result, pos); -} - -void substitute_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - int start_length = result.length(); - if (expr1) - expr1->evaluate(tentative, ref, result, pos); - if (result.length() > start_length && result[result.length() - 1] == '-') { - // ought to see if pos covers the - - result.set_length(result.length() - 1); - if (expr2) - expr2->evaluate(tentative, ref, result, pos); - } -} - -void conditional_expr::evaluate(int tentative, const reference &ref, - string &result, substring_position &pos) -{ - string temp; - substring_position temp_pos; - if (expr1) - expr1->evaluate(tentative, ref, temp, temp_pos); - if (temp.length() > 0) { - if (expr2) - expr2->evaluate(tentative, ref, result, pos); - } - else { - if (expr3) - expr3->evaluate(tentative, ref, result, pos); - } -} - -void reference::pre_compute_label() -{ - if (parsed_label != 0 - && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) { - label.clear(); - substring_position temp_pos; - parsed_label->evaluate(1, *this, label, temp_pos); - label_ptr = lookup_label(label); - } -} - -void reference::compute_label() -{ - label.clear(); - if (parsed_label) - parsed_label->evaluate(0, *this, label, separator_pos); - if (short_label_flag && parsed_short_label) - parsed_short_label->evaluate(0, *this, short_label, short_separator_pos); - if (date_as_label) { - string new_date; - if (parsed_date_label) { - substring_position temp_pos; - parsed_date_label->evaluate(0, *this, new_date, temp_pos); - } - set_date(new_date); - } - if (label_ptr) - label_ptr->count += 1; -} - -void reference::immediate_compute_label() -{ - if (label_ptr) - label_ptr->total = 2; // force use of disambiguator - compute_label(); -} - -int reference::merge_labels(reference **v, int n, label_type type, - string &result) -{ - if (abbreviate_label_ranges) - return merge_labels_by_number(v, n, type, result); - else - return merge_labels_by_parts(v, n, type, result); -} - -int reference::merge_labels_by_number(reference **v, int n, label_type type, - string &result) -{ - if (n <= 1) - return 0; - int num = get_number(); - // Only merge three or more labels. - if (v[0]->get_number() != num + 1 - || v[1]->get_number() != num + 2) - return 0; - int i; - for (i = 2; i < n; i++) - if (v[i]->get_number() != num + i + 1) - break; - result = get_label(type); - result += label_range_indicator; - result += v[i - 1]->get_label(type); - return i; -} - -const substring_position &reference::get_separator_pos(label_type type) const -{ - if (type == SHORT_LABEL && short_label_flag) - return short_separator_pos; - else - return separator_pos; -} - -const string &reference::get_label(label_type type) const -{ - if (type == SHORT_LABEL && short_label_flag) - return short_label; - else - return label; -} - -int reference::merge_labels_by_parts(reference **v, int n, label_type type, - string &result) -{ - if (n <= 0) - return 0; - const string &lb = get_label(type); - const substring_position &sp = get_separator_pos(type); - if (sp.start < 0 - || sp.start != v[0]->get_separator_pos(type).start - || memcmp(lb.contents(), v[0]->get_label(type).contents(), - sp.start) != 0) - return 0; - result = lb; - int i = 0; - do { - result += separate_label_second_parts; - const substring_position &s = v[i]->get_separator_pos(type); - int sep_end_pos = s.start + s.length; - result.append(v[i]->get_label(type).contents() + sep_end_pos, - v[i]->get_label(type).length() - sep_end_pos); - } while (++i < n - && sp.start == v[i]->get_separator_pos(type).start - && memcmp(lb.contents(), v[i]->get_label(type).contents(), - sp.start) == 0); - return i; -} - -string label_pool; - -label_info::label_info(const string &s) -: start(label_pool.length()), length(s.length()), count(0), total(1) -{ - label_pool += s; -} - -static label_info **label_table = 0; -static int label_table_size = 0; -static int label_table_used = 0; - -label_info *lookup_label(const string &label) -{ - if (label_table == 0) { - label_table = new label_info *[17]; - label_table_size = 17; - for (int i = 0; i < 17; i++) - label_table[i] = 0; - } - unsigned h = hash_string(label.contents(), label.length()) % label_table_size; - label_info **ptr; - for (ptr = label_table + h; - *ptr != 0; - (ptr == label_table) - ? (ptr = label_table + label_table_size - 1) - : ptr--) - if ((*ptr)->length == label.length() - && memcmp(label_pool.contents() + (*ptr)->start, label.contents(), - label.length()) == 0) { - (*ptr)->total += 1; - return *ptr; - } - label_info *result = *ptr = new label_info(label); - if (++label_table_used * 2 > label_table_size) { - // Rehash the table. - label_info **old_table = label_table; - int old_size = label_table_size; - label_table_size = next_size(label_table_size); - label_table = new label_info *[label_table_size]; - int i; - for (i = 0; i < label_table_size; i++) - label_table[i] = 0; - for (i = 0; i < old_size; i++) - if (old_table[i]) { - unsigned h = hash_string(label_pool.contents() + old_table[i]->start, - old_table[i]->length); - label_info **p; - for (p = label_table + (h % label_table_size); - *p != 0; - (p == label_table) - ? (p = label_table + label_table_size - 1) - : --p) - ; - *p = old_table[i]; - } - a_delete old_table; - } - return result; -} - -void clear_labels() -{ - for (int i = 0; i < label_table_size; i++) { - delete label_table[i]; - label_table[i] = 0; - } - label_table_used = 0; - label_pool.clear(); -} - -static void consider_authors(reference **start, reference **end, int i); - -void compute_labels(reference **v, int n) -{ - if (parsed_label - && (parsed_label->analyze() & expression::CONTAINS_AT) - && sort_fields.length() >= 2 - && sort_fields[0] == 'A' - && sort_fields[1] == '+') - consider_authors(v, v + n, 0); - for (int i = 0; i < n; i++) - v[i]->compute_label(); -} - - -/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i -where 0 <= i <= N if there exists a reference with a list of authors -<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i -and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0, -A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and -<B0,B1,...,BM>. If a reference needs author i we only have to call -need_author(j) for some j >= i such that the reference also needs -author j. */ - -/* This function handles 2 tasks: -determine which authors are needed (cannot be elided with et al.); -determine which authors can have only last names in the labels. - -References >= start and < end have the same first i author names. -Also they're sorted by A+. */ - -static void consider_authors(reference **start, reference **end, int i) -{ - if (start >= end) - return; - reference **p = start; - if (i >= (*p)->get_nauthors()) { - for (++p; p < end && i >= (*p)->get_nauthors(); p++) - ; - if (p < end && i > 0) { - // If we have an author list <A B C> and an author list <A B C D>, - // then both lists need C. - for (reference **q = start; q < end; q++) - (*q)->need_author(i - 1); - } - start = p; - } - while (p < end) { - reference **last_name_start = p; - reference **name_start = p; - for (++p; - p < end && i < (*p)->get_nauthors() - && same_author_last_name(**last_name_start, **p, i); - p++) { - if (!same_author_name(**name_start, **p, i)) { - consider_authors(name_start, p, i + 1); - name_start = p; - } - } - consider_authors(name_start, p, i + 1); - if (last_name_start == name_start) { - for (reference **q = last_name_start; q < p; q++) - (*q)->set_last_name_unambiguous(i); - } - // If we have an author list <A B C D> and <A B C E>, then the lists - // need author D and E respectively. - if (name_start > start || p < end) { - for (reference **q = last_name_start; q < p; q++) - (*q)->need_author(i); - } - } -} - -int same_author_last_name(const reference &r1, const reference &r2, int n) -{ - const char *ae1; - const char *as1 = r1.get_sort_field(0, n, 0, &ae1); - assert(as1 != 0); - const char *ae2; - const char *as2 = r2.get_sort_field(0, n, 0, &ae2); - assert(as2 != 0); - return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; -} - -int same_author_name(const reference &r1, const reference &r2, int n) -{ - const char *ae1; - const char *as1 = r1.get_sort_field(0, n, -1, &ae1); - assert(as1 != 0); - const char *ae2; - const char *as2 = r2.get_sort_field(0, n, -1, &ae2); - assert(as2 != 0); - return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; -} - - -void int_set::set(int i) -{ - assert(i >= 0); - int bytei = i >> 3; - if (bytei >= v.length()) { - int old_length = v.length(); - v.set_length(bytei + 1); - for (int j = old_length; j <= bytei; j++) - v[j] = 0; - } - v[bytei] |= 1 << (i & 7); -} - -int int_set::get(int i) const -{ - assert(i >= 0); - int bytei = i >> 3; - return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0; -} - -void reference::set_last_name_unambiguous(int i) -{ - last_name_unambiguous.set(i); -} - -void reference::need_author(int n) -{ - if (n > last_needed_author) - last_needed_author = n; -} - -const char *reference::get_authors(const char **end) const -{ - if (!computed_authors) { - ((reference *)this)->computed_authors = 1; - string &result = ((reference *)this)->authors; - int na = get_nauthors(); - result.clear(); - for (int i = 0; i < na; i++) { - if (last_name_unambiguous.get(i)) { - const char *e, *start = get_author_last_name(i, &e); - assert(start != 0); - result.append(start, e - start); - } - else { - const char *e, *start = get_author(i, &e); - assert(start != 0); - result.append(start, e - start); - } - if (i == last_needed_author - && et_al.length() > 0 - && et_al_min_elide > 0 - && last_needed_author + et_al_min_elide < na - && na >= et_al_min_total) { - result += et_al; - break; - } - if (i < na - 1) { - if (na == 2) - result += join_authors_exactly_two; - else if (i < na - 2) - result += join_authors_default; - else - result += join_authors_last_two; - } - } - } - const char *start = authors.contents(); - *end = start + authors.length(); - return start; -} - -int reference::get_nauthors() const -{ - if (nauthors < 0) { - const char *dummy; - int na; - for (na = 0; get_author(na, &dummy) != 0; na++) - ; - ((reference *)this)->nauthors = na; - } - return nauthors; -} diff --git a/contrib/groff/refer/ref.cc b/contrib/groff/refer/ref.cc deleted file mode 100644 index c3517b194598..000000000000 --- a/contrib/groff/refer/ref.cc +++ /dev/null @@ -1,1160 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. -Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "refer.h" -#include "refid.h" -#include "ref.h" -#include "token.h" - -static const char *find_day(const char *, const char *, const char **); -static int find_month(const char *start, const char *end); -static void abbreviate_names(string &); - -#define DEFAULT_ARTICLES "the\000a\000an" - -string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES)); - -// Multiple occurrences of fields are separated by FIELD_SEPARATOR. -const char FIELD_SEPARATOR = '\0'; - -const char MULTI_FIELD_NAMES[] = "AE"; -const char *AUTHOR_FIELDS = "AQ"; - -enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM }; - -const char *reference_types[] = { - "other", - "journal-article", - "book", - "article-in-book", - "tech-report", - "bell-tm", -}; - -static string temp_fields[256]; - -reference::reference(const char *start, int len, reference_id *ridp) -: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0), - computed_authors(0), last_needed_author(-1), nauthors(-1) -{ - int i; - for (i = 0; i < 256; i++) - field_index[i] = NULL_FIELD_INDEX; - if (ridp) - rid = *ridp; - if (start == 0) - return; - if (len <= 0) - return; - const char *end = start + len; - const char *ptr = start; - assert(*ptr == '%'); - while (ptr < end) { - if (ptr + 1 < end && ptr[1] != '\0' - && ((ptr[1] != '%' && ptr[1] == annotation_field) - || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0' - && discard_fields.search(ptr[2]) < 0))) { - if (ptr[1] == '%') - ptr++; - string &f = temp_fields[(unsigned char)ptr[1]]; - ptr += 2; - while (ptr < end && csspace(*ptr)) - ptr++; - for (;;) { - for (;;) { - if (ptr >= end) { - f += '\n'; - break; - } - f += *ptr; - if (*ptr++ == '\n') - break; - } - if (ptr >= end || *ptr == '%') - break; - } - } - else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%' - && discard_fields.search(ptr[1]) < 0) { - string &f = temp_fields[(unsigned char)ptr[1]]; - if (f.length() > 0) { - if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0) - f += FIELD_SEPARATOR; - else - f.clear(); - } - ptr += 2; - if (ptr < end) { - if (*ptr == ' ') - ptr++; - for (;;) { - const char *p = ptr; - while (ptr < end && *ptr != '\n') - ptr++; - // strip trailing white space - const char *q = ptr; - while (q > p && q[-1] != '\n' && csspace(q[-1])) - q--; - while (p < q) - f += *p++; - if (ptr >= end) - break; - ptr++; - if (ptr >= end) - break; - if (*ptr == '%') - break; - f += ' '; - } - } - } - else { - // skip this field - for (;;) { - while (ptr < end && *ptr++ != '\n') - ; - if (ptr >= end || *ptr == '%') - break; - } - } - } - for (i = 0; i < 256; i++) - if (temp_fields[i].length() > 0) - nfields++; - field = new string[nfields]; - int j = 0; - for (i = 0; i < 256; i++) - if (temp_fields[i].length() > 0) { - field[j].move(temp_fields[i]); - if (abbreviate_fields.search(i) >= 0) - abbreviate_names(field[j]); - field_index[i] = j; - j++; - } -} - -reference::~reference() -{ - if (nfields > 0) - ad_delete(nfields) field; -} - -// ref is the inline, this is the database ref - -void reference::merge(reference &ref) -{ - int i; - for (i = 0; i < 256; i++) - if (field_index[i] != NULL_FIELD_INDEX) - temp_fields[i].move(field[field_index[i]]); - for (i = 0; i < 256; i++) - if (ref.field_index[i] != NULL_FIELD_INDEX) - temp_fields[i].move(ref.field[ref.field_index[i]]); - for (i = 0; i < 256; i++) - field_index[i] = NULL_FIELD_INDEX; - int old_nfields = nfields; - nfields = 0; - for (i = 0; i < 256; i++) - if (temp_fields[i].length() > 0) - nfields++; - if (nfields != old_nfields) { - if (old_nfields > 0) - ad_delete(old_nfields) field; - field = new string[nfields]; - } - int j = 0; - for (i = 0; i < 256; i++) - if (temp_fields[i].length() > 0) { - field[j].move(temp_fields[i]); - field_index[i] = j; - j++; - } - merged = 1; -} - -void reference::insert_field(unsigned char c, string &s) -{ - assert(s.length() > 0); - if (field_index[c] != NULL_FIELD_INDEX) { - field[field_index[c]].move(s); - return; - } - assert(field_index[c] == NULL_FIELD_INDEX); - string *old_field = field; - field = new string[nfields + 1]; - int pos = 0; - int i; - for (i = 0; i < int(c); i++) - if (field_index[i] != NULL_FIELD_INDEX) - pos++; - for (i = 0; i < pos; i++) - field[i].move(old_field[i]); - field[pos].move(s); - for (i = pos; i < nfields; i++) - field[i + 1].move(old_field[i]); - if (nfields > 0) - ad_delete(nfields) old_field; - nfields++; - field_index[c] = pos; - for (i = c + 1; i < 256; i++) - if (field_index[i] != NULL_FIELD_INDEX) - field_index[i] += 1; -} - -void reference::delete_field(unsigned char c) -{ - if (field_index[c] == NULL_FIELD_INDEX) - return; - string *old_field = field; - field = new string[nfields - 1]; - int i; - for (i = 0; i < int(field_index[c]); i++) - field[i].move(old_field[i]); - for (i = field_index[c]; i < nfields - 1; i++) - field[i].move(old_field[i + 1]); - if (nfields > 0) - ad_delete(nfields) old_field; - nfields--; - field_index[c] = NULL_FIELD_INDEX; - for (i = c + 1; i < 256; i++) - if (field_index[i] != NULL_FIELD_INDEX) - field_index[i] -= 1; -} - -void reference::compute_hash_code() -{ - if (!rid.is_null()) - h = rid.hash(); - else { - h = 0; - for (int i = 0; i < nfields; i++) - if (field[i].length() > 0) { - h <<= 4; - h ^= hash_string(field[i].contents(), field[i].length()); - } - } -} - -void reference::set_number(int n) -{ - no = n; -} - -const char SORT_SEP = '\001'; -const char SORT_SUB_SEP = '\002'; -const char SORT_SUB_SUB_SEP = '\003'; - -// sep specifies additional word separators - -void sortify_words(const char *s, const char *end, const char *sep, - string &result) -{ - int non_empty = 0; - int need_separator = 0; - for (;;) { - const char *token_start = s; - if (!get_token(&s, end)) - break; - if ((s - token_start == 1 - && (*token_start == ' ' - || *token_start == '\n' - || (sep && *token_start != '\0' - && strchr(sep, *token_start) != 0))) - || (s - token_start == 2 - && token_start[0] == '\\' && token_start[1] == ' ')) { - if (non_empty) - need_separator = 1; - } - else { - const token_info *ti = lookup_token(token_start, s); - if (ti->sortify_non_empty(token_start, s)) { - if (need_separator) { - result += ' '; - need_separator = 0; - } - ti->sortify(token_start, s, result); - non_empty = 1; - } - } - } -} - -void sortify_word(const char *s, const char *end, string &result) -{ - for (;;) { - const char *token_start = s; - if (!get_token(&s, end)) - break; - const token_info *ti = lookup_token(token_start, s); - ti->sortify(token_start, s, result); - } -} - -void sortify_other(const char *s, int len, string &key) -{ - sortify_words(s, s + len, 0, key); -} - -void sortify_title(const char *s, int len, string &key) -{ - const char *end = s + len; - for (; s < end && (*s == ' ' || *s == '\n'); s++) - ; - const char *ptr = s; - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, end)) - break; - if (ptr - token_start == 1 - && (*token_start == ' ' || *token_start == '\n')) - break; - } - if (ptr < end) { - int first_word_len = ptr - s - 1; - const char *ae = articles.contents() + articles.length(); - for (const char *a = articles.contents(); - a < ae; - a = strchr(a, '\0') + 1) - if (first_word_len == strlen(a)) { - int j; - for (j = 0; j < first_word_len; j++) - if (a[j] != cmlower(s[j])) - break; - if (j >= first_word_len) { - s = ptr; - for (; s < end && (*s == ' ' || *s == '\n'); s++) - ; - break; - } - } - } - sortify_words(s, end, 0, key); -} - -void sortify_name(const char *s, int len, string &key) -{ - const char *last_name_end; - const char *last_name = find_last_name(s, s + len, &last_name_end); - sortify_word(last_name, last_name_end, key); - key += SORT_SUB_SUB_SEP; - if (last_name > s) - sortify_words(s, last_name, ".", key); - key += SORT_SUB_SUB_SEP; - if (last_name_end < s + len) - sortify_words(last_name_end, s + len, ".,", key); -} - -void sortify_date(const char *s, int len, string &key) -{ - const char *year_end; - const char *year_start = find_year(s, s + len, &year_end); - if (!year_start) { - // Things without years are often `forthcoming', so it makes sense - // that they sort after things with explicit years. - key += 'A'; - sortify_words(s, s + len, 0, key); - return; - } - int n = year_end - year_start; - while (n < 4) { - key += '0'; - n++; - } - while (year_start < year_end) - key += *year_start++; - int m = find_month(s, s + len); - if (m < 0) - return; - key += 'A' + m; - const char *day_end; - const char *day_start = find_day(s, s + len, &day_end); - if (!day_start) - return; - if (day_end - day_start == 1) - key += '0'; - while (day_start < day_end) - key += *day_start++; -} - -// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification. - -void sortify_label(const char *s, int len, string &key) -{ - const char *end = s + len; - for (;;) { - const char *ptr; - for (ptr = s; - ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP; - ptr++) - ; - if (ptr > s) - sortify_words(s, ptr, 0, key); - s = ptr; - if (s >= end) - break; - key += *s++; - } -} - -void reference::compute_sort_key() -{ - if (sort_fields.length() == 0) - return; - sort_fields += '\0'; - const char *sf = sort_fields.contents(); - while (*sf != '\0') { - if (sf > sort_fields) - sort_key += SORT_SEP; - char f = *sf++; - int n = 1; - if (*sf == '+') { - n = INT_MAX; - sf++; - } - else if (csdigit(*sf)) { - char *ptr; - long l = strtol(sf, &ptr, 10); - if (l == 0 && ptr == sf) - ; - else { - sf = ptr; - if (l < 0) { - n = 1; - } - else { - n = int(l); - } - } - } - if (f == '.') - sortify_label(label.contents(), label.length(), sort_key); - else if (f == AUTHOR_FIELDS[0]) - sortify_authors(n, sort_key); - else - sortify_field(f, n, sort_key); - } - sort_fields.set_length(sort_fields.length() - 1); -} - -void reference::sortify_authors(int n, string &result) const -{ - for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++) - if (contains_field(*p)) { - sortify_field(*p, n, result); - return; - } - sortify_field(AUTHOR_FIELDS[0], n, result); -} - -void reference::canonicalize_authors(string &result) const -{ - int len = result.length(); - sortify_authors(INT_MAX, result); - if (result.length() > len) - result += SORT_SUB_SEP; -} - -void reference::sortify_field(unsigned char f, int n, string &result) const -{ - typedef void (*sortify_t)(const char *, int, string &); - sortify_t sortifier = sortify_other; - switch (f) { - case 'A': - case 'E': - sortifier = sortify_name; - break; - case 'D': - sortifier = sortify_date; - break; - case 'B': - case 'J': - case 'T': - sortifier = sortify_title; - break; - } - int fi = field_index[(unsigned char)f]; - if (fi != NULL_FIELD_INDEX) { - string &str = field[fi]; - const char *start = str.contents(); - const char *end = start + str.length(); - for (int i = 0; i < n && start < end; i++) { - const char *p = start; - while (start < end && *start != FIELD_SEPARATOR) - start++; - if (i > 0) - result += SORT_SUB_SEP; - (*sortifier)(p, start - p, result); - if (start < end) - start++; - } - } -} - -int compare_reference(const reference &r1, const reference &r2) -{ - assert(r1.no >= 0); - assert(r2.no >= 0); - const char *s1 = r1.sort_key.contents(); - int n1 = r1.sort_key.length(); - const char *s2 = r2.sort_key.contents(); - int n2 = r2.sort_key.length(); - for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2) - if (*s1 != *s2) - return (int)(unsigned char)*s1 - (int)(unsigned char)*s2; - if (n2 > 0) - return -1; - if (n1 > 0) - return 1; - return r1.no - r2.no; -} - -int same_reference(const reference &r1, const reference &r2) -{ - if (!r1.rid.is_null() && r1.rid == r2.rid) - return 1; - if (r1.h != r2.h) - return 0; - if (r1.nfields != r2.nfields) - return 0; - int i = 0; - for (i = 0; i < 256; i++) - if (r1.field_index != r2.field_index) - return 0; - for (i = 0; i < r1.nfields; i++) - if (r1.field[i] != r2.field[i]) - return 0; - return 1; -} - -const char *find_last_name(const char *start, const char *end, - const char **endp) -{ - const char *ptr = start; - const char *last_word = start; - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, end)) - break; - if (ptr - token_start == 1) { - if (*token_start == ',') { - *endp = token_start; - return last_word; - } - else if (*token_start == ' ' || *token_start == '\n') { - if (ptr < end && *ptr != ' ' && *ptr != '\n') - last_word = ptr; - } - } - } - *endp = end; - return last_word; -} - -void abbreviate_name(const char *ptr, const char *end, string &result) -{ - const char *last_name_end; - const char *last_name_start = find_last_name(ptr, end, &last_name_end); - int need_period = 0; - for (;;) { - const char *token_start = ptr; - if (!get_token(&ptr, last_name_start)) - break; - const token_info *ti = lookup_token(token_start, ptr); - if (need_period) { - if ((ptr - token_start == 1 && *token_start == ' ') - || (ptr - token_start == 2 && token_start[0] == '\\' - && token_start[1] == ' ')) - continue; - if (ti->is_upper()) - result += period_before_initial; - else - result += period_before_other; - need_period = 0; - } - result.append(token_start, ptr - token_start); - if (ti->is_upper()) { - const char *lower_ptr = ptr; - int first_token = 1; - for (;;) { - token_start = ptr; - if (!get_token(&ptr, last_name_start)) - break; - if ((ptr - token_start == 1 && *token_start == ' ') - || (ptr - token_start == 2 && token_start[0] == '\\' - && token_start[1] == ' ')) - break; - ti = lookup_token(token_start, ptr); - if (ti->is_hyphen()) { - const char *ptr1 = ptr; - if (get_token(&ptr1, last_name_start)) { - ti = lookup_token(ptr, ptr1); - if (ti->is_upper()) { - result += period_before_hyphen; - result.append(token_start, ptr1 - token_start); - ptr = ptr1; - } - } - } - else if (ti->is_upper()) { - // MacDougal -> MacD. - result.append(lower_ptr, ptr - lower_ptr); - lower_ptr = ptr; - first_token = 1; - } - else if (first_token && ti->is_accent()) { - result.append(token_start, ptr - token_start); - lower_ptr = ptr; - } - first_token = 0; - } - need_period = 1; - } - } - if (need_period) - result += period_before_last_name; - result.append(last_name_start, end - last_name_start); -} - -static void abbreviate_names(string &result) -{ - string str; - str.move(result); - const char *ptr = str.contents(); - const char *end = ptr + str.length(); - while (ptr < end) { - const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); - if (name_end == 0) - name_end = end; - abbreviate_name(ptr, name_end, result); - if (name_end >= end) - break; - ptr = name_end + 1; - result += FIELD_SEPARATOR; - } -} - -void reverse_name(const char *ptr, const char *name_end, string &result) -{ - const char *last_name_end; - const char *last_name_start = find_last_name(ptr, name_end, &last_name_end); - result.append(last_name_start, last_name_end - last_name_start); - while (last_name_start > ptr - && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n')) - last_name_start--; - if (last_name_start > ptr) { - result += ", "; - result.append(ptr, last_name_start - ptr); - } - if (last_name_end < name_end) - result.append(last_name_end, name_end - last_name_end); -} - -void reverse_names(string &result, int n) -{ - if (n <= 0) - return; - string str; - str.move(result); - const char *ptr = str.contents(); - const char *end = ptr + str.length(); - while (ptr < end) { - if (--n < 0) { - result.append(ptr, end - ptr); - break; - } - const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); - if (name_end == 0) - name_end = end; - reverse_name(ptr, name_end, result); - if (name_end >= end) - break; - ptr = name_end + 1; - result += FIELD_SEPARATOR; - } -} - -// Return number of field separators. - -int join_fields(string &f) -{ - const char *ptr = f.contents(); - int len = f.length(); - int nfield_seps = 0; - int j; - for (j = 0; j < len; j++) - if (ptr[j] == FIELD_SEPARATOR) - nfield_seps++; - if (nfield_seps == 0) - return 0; - string temp; - int field_seps_left = nfield_seps; - for (j = 0; j < len; j++) { - if (ptr[j] == FIELD_SEPARATOR) { - if (nfield_seps == 1) - temp += join_authors_exactly_two; - else if (--field_seps_left == 0) - temp += join_authors_last_two; - else - temp += join_authors_default; - } - else - temp += ptr[j]; - } - f = temp; - return nfield_seps; -} - -void uppercase(const char *start, const char *end, string &result) -{ - for (;;) { - const char *token_start = start; - if (!get_token(&start, end)) - break; - const token_info *ti = lookup_token(token_start, start); - ti->upper_case(token_start, start, result); - } -} - -void lowercase(const char *start, const char *end, string &result) -{ - for (;;) { - const char *token_start = start; - if (!get_token(&start, end)) - break; - const token_info *ti = lookup_token(token_start, start); - ti->lower_case(token_start, start, result); - } -} - -void capitalize(const char *ptr, const char *end, string &result) -{ - int in_small_point_size = 0; - for (;;) { - const char *start = ptr; - if (!get_token(&ptr, end)) - break; - const token_info *ti = lookup_token(start, ptr); - const char *char_end = ptr; - int is_lower = ti->is_lower(); - if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) { - const token_info *ti2 = lookup_token(char_end, ptr); - if (!ti2->is_accent()) - ptr = char_end; - } - if (is_lower) { - if (!in_small_point_size) { - result += "\\s-2"; - in_small_point_size = 1; - } - ti->upper_case(start, char_end, result); - result.append(char_end, ptr - char_end); - } - else { - if (in_small_point_size) { - result += "\\s+2"; - in_small_point_size = 0; - } - result.append(start, ptr - start); - } - } - if (in_small_point_size) - result += "\\s+2"; -} - -void capitalize_field(string &str) -{ - string temp; - capitalize(str.contents(), str.contents() + str.length(), temp); - str.move(temp); -} - -int is_terminated(const char *ptr, const char *end) -{ - const char *last_token = end; - for (;;) { - const char *p = ptr; - if (!get_token(&ptr, end)) - break; - last_token = p; - } - return end - last_token == 1 - && (*last_token == '.' || *last_token == '!' || *last_token == '?'); -} - -void reference::output(FILE *fp) -{ - fputs(".]-\n", fp); - for (int i = 0; i < 256; i++) - if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) { - string &f = field[field_index[i]]; - if (!csdigit(i)) { - int j = reverse_fields.search(i); - if (j >= 0) { - int n; - int len = reverse_fields.length(); - if (++j < len && csdigit(reverse_fields[j])) { - n = reverse_fields[j] - '0'; - for (++j; j < len && csdigit(reverse_fields[j]); j++) - // should check for overflow - n = n*10 + reverse_fields[j] - '0'; - } - else - n = INT_MAX; - reverse_names(f, n); - } - } - int is_multiple = join_fields(f) > 0; - if (capitalize_fields.search(i) >= 0) - capitalize_field(f); - if (memchr(f.contents(), '\n', f.length()) == 0) { - fprintf(fp, ".ds [%c ", i); - if (f[0] == ' ' || f[0] == '\\' || f[0] == '"') - putc('"', fp); - put_string(f, fp); - putc('\n', fp); - } - else { - fprintf(fp, ".de [%c\n", i); - put_string(f, fp); - fputs("..\n", fp); - } - if (i == 'P') { - int multiple_pages = 0; - const char *s = f.contents(); - const char *end = f.contents() + f.length(); - for (;;) { - const char *token_start = s; - if (!get_token(&s, end)) - break; - const token_info *ti = lookup_token(token_start, s); - if (ti->is_hyphen() || ti->is_range_sep()) { - multiple_pages = 1; - break; - } - } - fprintf(fp, ".nr [P %d\n", multiple_pages); - } - else if (i == 'E') - fprintf(fp, ".nr [E %d\n", is_multiple); - } - for (const char *p = "TAO"; *p; p++) { - int fi = field_index[(unsigned char)*p]; - if (fi != NULL_FIELD_INDEX) { - string &f = field[fi]; - fprintf(fp, ".nr [%c %d\n", *p, - is_terminated(f.contents(), f.contents() + f.length())); - } - } - int t = classify(); - fprintf(fp, ".][ %d %s\n", t, reference_types[t]); - if (annotation_macro.length() > 0 && annotation_field >= 0 - && field_index[annotation_field] != NULL_FIELD_INDEX) { - putc('.', fp); - put_string(annotation_macro, fp); - putc('\n', fp); - put_string(field[field_index[annotation_field]], fp); - } -} - -void reference::print_sort_key_comment(FILE *fp) -{ - fputs(".\\\"", fp); - put_string(sort_key, fp); - putc('\n', fp); -} - -const char *find_year(const char *start, const char *end, const char **endp) -{ - for (;;) { - while (start < end && !csdigit(*start)) - start++; - const char *ptr = start; - if (start == end) - break; - while (ptr < end && csdigit(*ptr)) - ptr++; - if (ptr - start == 4 || ptr - start == 3 - || (ptr - start == 2 - && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) { - *endp = ptr; - return start; - } - start = ptr; - } - return 0; -} - -static const char *find_day(const char *start, const char *end, - const char **endp) -{ - for (;;) { - while (start < end && !csdigit(*start)) - start++; - const char *ptr = start; - if (start == end) - break; - while (ptr < end && csdigit(*ptr)) - ptr++; - if ((ptr - start == 1 && start[0] != '0') - || (ptr - start == 2 && - (start[0] == '1' - || start[0] == '2' - || (start[0] == '3' && start[1] <= '1') - || (start[0] == '0' && start[1] != '0')))) { - *endp = ptr; - return start; - } - start = ptr; - } - return 0; -} - -static int find_month(const char *start, const char *end) -{ - static const char *months[] = { - "january", - "february", - "march", - "april", - "may", - "june", - "july", - "august", - "september", - "october", - "november", - "december", - }; - for (;;) { - while (start < end && !csalpha(*start)) - start++; - const char *ptr = start; - if (start == end) - break; - while (ptr < end && csalpha(*ptr)) - ptr++; - if (ptr - start >= 3) { - for (int i = 0; i < sizeof(months)/sizeof(months[0]); i++) { - const char *q = months[i]; - const char *p = start; - for (; p < ptr; p++, q++) - if (cmlower(*p) != *q) - break; - if (p >= ptr) - return i; - } - } - start = ptr; - } - return -1; -} - -int reference::contains_field(char c) const -{ - return field_index[(unsigned char)c] != NULL_FIELD_INDEX; -} - -int reference::classify() -{ - if (contains_field('J')) - return JOURNAL_ARTICLE; - if (contains_field('B')) - return ARTICLE_IN_BOOK; - if (contains_field('G')) - return TECH_REPORT; - if (contains_field('R')) - return TECH_REPORT; - if (contains_field('I')) - return BOOK; - if (contains_field('M')) - return BELL_TM; - return OTHER; -} - -const char *reference::get_year(const char **endp) const -{ - if (field_index['D'] != NULL_FIELD_INDEX) { - string &date = field[field_index['D']]; - const char *start = date.contents(); - const char *end = start + date.length(); - return find_year(start, end, endp); - } - else - return 0; -} - -const char *reference::get_field(unsigned char c, const char **endp) const -{ - if (field_index[c] != NULL_FIELD_INDEX) { - string &f = field[field_index[c]]; - const char *start = f.contents(); - *endp = start + f.length(); - return start; - } - else - return 0; -} - -const char *reference::get_date(const char **endp) const -{ - return get_field('D', endp); -} - -const char *nth_field(int i, const char *start, const char **endp) -{ - while (--i >= 0) { - start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); - if (!start) - return 0; - start++; - } - const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); - if (e) - *endp = e; - return start; -} - -const char *reference::get_author(int i, const char **endp) const -{ - for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { - const char *start = get_field(*f, endp); - if (start) { - if (strchr(MULTI_FIELD_NAMES, *f) != 0) - return nth_field(i, start, endp); - else if (i == 0) - return start; - else - return 0; - } - } - return 0; -} - -const char *reference::get_author_last_name(int i, const char **endp) const -{ - for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { - const char *start = get_field(*f, endp); - if (start) { - if (strchr(MULTI_FIELD_NAMES, *f) != 0) { - start = nth_field(i, start, endp); - if (!start) - return 0; - } - if (*f == 'A') - return find_last_name(start, *endp, endp); - else - return start; - } - } - return 0; -} - -void reference::set_date(string &d) -{ - if (d.length() == 0) - delete_field('D'); - else - insert_field('D', d); -} - -int same_year(const reference &r1, const reference &r2) -{ - const char *ye1; - const char *ys1 = r1.get_year(&ye1); - const char *ye2; - const char *ys2 = r2.get_year(&ye2); - if (ys1 == 0) { - if (ys2 == 0) - return same_date(r1, r2); - else - return 0; - } - else if (ys2 == 0) - return 0; - else if (ye1 - ys1 != ye2 - ys2) - return 0; - else - return memcmp(ys1, ys2, ye1 - ys1) == 0; -} - -int same_date(const reference &r1, const reference &r2) -{ - const char *e1; - const char *s1 = r1.get_date(&e1); - const char *e2; - const char *s2 = r2.get_date(&e2); - if (s1 == 0) - return s2 == 0; - else if (s2 == 0) - return 0; - else if (e1 - s1 != e2 - s2) - return 0; - else - return memcmp(s1, s2, e1 - s1) == 0; -} - -const char *reference::get_sort_field(int i, int si, int ssi, - const char **endp) const -{ - const char *start = sort_key.contents(); - const char *end = start + sort_key.length(); - if (i < 0) { - *endp = end; - return start; - } - while (--i >= 0) { - start = (char *)memchr(start, SORT_SEP, end - start); - if (!start) - return 0; - start++; - } - const char *e = (char *)memchr(start, SORT_SEP, end - start); - if (e) - end = e; - if (si < 0) { - *endp = end; - return start; - } - while (--si >= 0) { - start = (char *)memchr(start, SORT_SUB_SEP, end - start); - if (!start) - return 0; - start++; - } - e = (char *)memchr(start, SORT_SUB_SEP, end - start); - if (e) - end = e; - if (ssi < 0) { - *endp = end; - return start; - } - while (--ssi >= 0) { - start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); - if (!start) - return 0; - start++; - } - e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); - if (e) - end = e; - *endp = end; - return start; -} - diff --git a/contrib/groff/refer/ref.h b/contrib/groff/refer/ref.h deleted file mode 100644 index 13a984a4c727..000000000000 --- a/contrib/groff/refer/ref.h +++ /dev/null @@ -1,120 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -struct label_info; - -enum label_type { NORMAL_LABEL, SHORT_LABEL }; -const int N_LABEL_TYPES = 2; - -struct substring_position { - int start; - int length; - substring_position() : start(-1) { } -}; - -class int_set { - string v; -public: - int_set() { } - void set(int i); - int get(int i) const; -}; - -class reference { -private: - unsigned h; - reference_id rid; - int merged; - string sort_key; - int no; - string *field; - int nfields; - unsigned char field_index[256]; - enum { NULL_FIELD_INDEX = 255 }; - string label; - substring_position separator_pos; - string short_label; - substring_position short_separator_pos; - label_info *label_ptr; - string authors; - int computed_authors; - int last_needed_author; - int nauthors; - int_set last_name_unambiguous; - - int contains_field(char) const; - void insert_field(unsigned char, string &s); - void delete_field(unsigned char); - void set_date(string &); - const char *get_sort_field(int i, int si, int ssi, const char **endp) const; - int merge_labels_by_parts(reference **, int, label_type, string &); - int merge_labels_by_number(reference **, int, label_type, string &); -public: - reference(const char * = 0, int = -1, reference_id * = 0); - ~reference(); - void output(FILE *); - void print_sort_key_comment(FILE *); - void set_number(int); - int get_number() const { return no; } - unsigned hash() const { return h; } - const string &get_label(label_type type) const; - const substring_position &get_separator_pos(label_type) const; - int is_merged() const { return merged; } - void compute_sort_key(); - void compute_hash_code(); - void pre_compute_label(); - void compute_label(); - void immediate_compute_label(); - int classify(); - void merge(reference &); - int merge_labels(reference **, int, label_type, string &); - int get_nauthors() const; - void need_author(int); - void set_last_name_unambiguous(int); - void sortify_authors(int, string &) const; - void canonicalize_authors(string &) const; - void sortify_field(unsigned char, int, string &) const; - const char *get_author(int, const char **) const; - const char *get_author_last_name(int, const char **) const; - const char *get_date(const char **) const; - const char *get_year(const char **) const; - const char *get_field(unsigned char, const char **) const; - const label_info *get_label_ptr() const { return label_ptr; } - const char *get_authors(const char **) const; - // for sorting - friend int compare_reference(const reference &r1, const reference &r2); - // for merging - friend int same_reference(const reference &, const reference &); - friend int same_year(const reference &, const reference &); - friend int same_date(const reference &, const reference &); - friend int same_author_last_name(const reference &, const reference &, int); - friend int same_author_name(const reference &, const reference &, int); -}; - -const char *find_year(const char *, const char *, const char **); -const char *find_last_name(const char *, const char *, const char **); - -const char *nth_field(int i, const char *start, const char **endp); - -void capitalize(const char *ptr, const char *end, string &result); -void reverse_name(const char *ptr, const char *end, string &result); -void uppercase(const char *ptr, const char *end, string &result); -void lowercase(const char *ptr, const char *end, string &result); -void abbreviate_name(const char *ptr, const char *end, string &result); diff --git a/contrib/groff/refer/refer.cc b/contrib/groff/refer/refer.cc deleted file mode 100644 index 70c696f4a9d3..000000000000 --- a/contrib/groff/refer/refer.cc +++ /dev/null @@ -1,1228 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "refer.h" -#include "refid.h" -#include "ref.h" -#include "token.h" -#include "search.h" -#include "command.h" - -const char PRE_LABEL_MARKER = '\013'; -const char POST_LABEL_MARKER = '\014'; -const char LABEL_MARKER = '\015'; // label_type is added on - -#define FORCE_LEFT_BRACKET 04 -#define FORCE_RIGHT_BRACKET 010 - -static FILE *outfp = stdout; - -string capitalize_fields; -string reverse_fields; -string abbreviate_fields; -string period_before_last_name = ". "; -string period_before_initial = "."; -string period_before_hyphen = ""; -string period_before_other = ". "; -string sort_fields; -int annotation_field = -1; -string annotation_macro; -string discard_fields = "XYZ"; -string pre_label = "\\*([."; -string post_label = "\\*(.]"; -string sep_label = ", "; -int accumulate = 0; -int move_punctuation = 0; -int abbreviate_label_ranges = 0; -string label_range_indicator; -int label_in_text = 1; -int label_in_reference = 1; -int date_as_label = 0; -int sort_adjacent_labels = 0; -// Join exactly two authors with this. -string join_authors_exactly_two = " and "; -// When there are more than two authors join the last two with this. -string join_authors_last_two = ", and "; -// Otherwise join authors with this. -string join_authors_default = ", "; -string separate_label_second_parts = ", "; -// Use this string to represent that there are other authors. -string et_al = " et al"; -// Use et al only if it can replace at least this many authors. -int et_al_min_elide = 2; -// Use et al only if the total number of authors is at least this. -int et_al_min_total = 3; - - -int compatible_flag = 0; - -int short_label_flag = 0; - -static int recognize_R1_R2 = 1; - -search_list database_list; -int search_default = 1; -static int default_database_loaded = 0; - -static reference **citation = 0; -static int ncitations = 0; -static int citation_max = 0; - -static reference **reference_hash_table = 0; -static int hash_table_size; -static int nreferences = 0; - -static int need_syncing = 0; -string pending_line; -string pending_lf_lines; - -static void output_pending_line(); -static unsigned immediately_handle_reference(const string &); -static void immediately_output_references(); -static unsigned store_reference(const string &); -static void divert_to_temporary_file(); -static reference *make_reference(const string &, unsigned *); -static void usage(); -static void do_file(const char *); -static void split_punct(string &line, string &punct); -static void output_citation_group(reference **v, int n, label_type, FILE *fp); -static void possibly_load_default_database(); - -int main(int argc, char **argv) -{ - program_name = argv[0]; - static char stderr_buf[BUFSIZ]; - setbuf(stderr, stderr_buf); - outfp = stdout; - int finished_options = 0; - int bib_flag = 0; - int done_spec = 0; - - for (--argc, ++argv; - !finished_options && argc > 0 && argv[0][0] == '-' - && argv[0][1] != '\0'; - argv++, argc--) { - const char *opt = argv[0] + 1; - while (opt != 0 && *opt != '\0') { - switch (*opt) { - case 'C': - compatible_flag = 1; - opt++; - break; - case 'B': - bib_flag = 1; - label_in_reference = 0; - label_in_text = 0; - ++opt; - if (*opt == '\0') { - annotation_field = 'X'; - annotation_macro = "AP"; - } - else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { - annotation_field = opt[0]; - annotation_macro = opt + 2; - } - opt = 0; - break; - case 'P': - move_punctuation = 1; - opt++; - break; - case 'R': - recognize_R1_R2 = 0; - opt++; - break; - case 'S': - // Not a very useful spec. - set_label_spec("(A.n|Q)', '(D.y|D)"); - done_spec = 1; - pre_label = " ("; - post_label = ")"; - sep_label = "; "; - opt++; - break; - case 'V': - verify_flag = 1; - opt++; - break; - case 'f': - { - const char *num = 0; - if (*++opt == '\0') { - if (argc > 1) { - num = *++argv; - --argc; - } - else { - error("option `f' requires an argument"); - usage(); - } - } - else { - num = opt; - opt = 0; - } - const char *ptr; - for (ptr = num; *ptr; ptr++) - if (!csdigit(*ptr)) { - error("bad character `%1' in argument to -f option", *ptr); - break; - } - if (*ptr == '\0') { - string spec; - spec = '%'; - spec += num; - spec += '\0'; - set_label_spec(spec.contents()); - done_spec = 1; - } - break; - } - case 'b': - label_in_text = 0; - label_in_reference = 0; - opt++; - break; - case 'e': - accumulate = 1; - opt++; - break; - case 'c': - capitalize_fields = ++opt; - opt = 0; - break; - case 'k': - { - char buf[5]; - if (csalpha(*++opt)) - buf[0] = *opt++; - else { - if (*opt != '\0') - error("bad field name `%1'", *opt++); - buf[0] = 'L'; - } - buf[1] = '~'; - buf[2] = '%'; - buf[3] = 'a'; - buf[4] = '\0'; - set_label_spec(buf); - done_spec = 1; - } - break; - case 'a': - { - const char *ptr; - for (ptr = ++opt; *ptr; ptr++) - if (!csdigit(*ptr)) { - error("argument to `a' option not a number"); - break; - } - if (*ptr == '\0') { - reverse_fields = 'A'; - reverse_fields += opt; - } - opt = 0; - } - break; - case 'i': - linear_ignore_fields = ++opt; - opt = 0; - break; - case 'l': - { - char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a - strcpy(buf, "A.n"); - if (*++opt != '\0' && *opt != ',') { - char *ptr; - long n = strtol(opt, &ptr, 10); - if (n == 0 && ptr == opt) { - error("bad integer `%1' in `l' option", opt); - opt = 0; - break; - } - if (n < 0) - n = 0; - opt = ptr; - sprintf(strchr(buf, '\0'), "+%ld", n); - } - strcat(buf, "D.y"); - if (*opt == ',') - opt++; - if (*opt != '\0') { - char *ptr; - long n = strtol(opt, &ptr, 10); - if (n == 0 && ptr == opt) { - error("bad integer `%1' in `l' option", opt); - opt = 0; - break; - } - if (n < 0) - n = 0; - sprintf(strchr(buf, '\0'), "-%ld", n); - opt = ptr; - if (*opt != '\0') - error("argument to `l' option not of form `m,n'"); - } - strcat(buf, "%a"); - if (!set_label_spec(buf)) - assert(0); - done_spec = 1; - } - break; - case 'n': - search_default = 0; - opt++; - break; - case 'p': - { - const char *filename = 0; - if (*++opt == '\0') { - if (argc > 1) { - filename = *++argv; - argc--; - } - else { - error("option `p' requires an argument"); - usage(); - } - } - else { - filename = opt; - opt = 0; - } - database_list.add_file(filename); - } - break; - case 's': - if (*++opt == '\0') - sort_fields = "AD"; - else { - sort_fields = opt; - opt = 0; - } - accumulate = 1; - break; - case 't': - { - char *ptr; - long n = strtol(opt, &ptr, 10); - if (n == 0 && ptr == opt) { - error("bad integer `%1' in `t' option", opt); - opt = 0; - break; - } - if (n < 1) - n = 1; - linear_truncate_len = int(n); - opt = ptr; - break; - } - case 'v': - { - extern const char *Version_string; - fprintf(stderr, "GNU refer version %s\n", Version_string); - fflush(stderr); - opt++; - break; - } - case '-': - if (opt[1] == '\0') { - finished_options = 1; - opt++; - break; - } - // fall through - default: - error("unrecognized option `%1'", *opt); - usage(); - break; - } - } - } - if (!done_spec) - set_label_spec("%1"); - if (argc <= 0) { - if (bib_flag) - do_bib("-"); - else - do_file("-"); - } - else { - for (int i = 0; i < argc; i++) { - if (bib_flag) - do_bib(argv[i]); - else - do_file(argv[i]); - } - } - if (accumulate) - output_references(); - if (fflush(stdout) < 0) - fatal("output error"); - return 0; -} - -static void usage() -{ - fprintf(stderr, -"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" -" [-sXYZ] [-tN] [-BL.M] [files ...]\n", - program_name); - exit(1); -} - -static void possibly_load_default_database() -{ - if (search_default && !default_database_loaded) { - char *filename = getenv("REFER"); - if (filename) - database_list.add_file(filename); - else - database_list.add_file(DEFAULT_INDEX, 1); - default_database_loaded = 1; - } -} - -static int is_list(const string &str) -{ - const char *start = str.contents(); - const char *end = start + str.length(); - while (end > start && csspace(end[-1])) - end--; - while (start < end && csspace(*start)) - start++; - return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; -} - -static void do_file(const char *filename) -{ - FILE *fp; - if (strcmp(filename, "-") == 0) { - fp = stdin; - } - else { - errno = 0; - fp = fopen(filename, "r"); - if (fp == 0) { - error("can't open `%1': %2", filename, strerror(errno)); - return; - } - } - current_filename = filename; - fprintf(outfp, ".lf 1 %s\n", filename); - string line; - current_lineno = 0; - for (;;) { - line.clear(); - for (;;) { - int c = getc(fp); - if (c == EOF) { - if (line.length() > 0) - line += '\n'; - break; - } - if (illegal_input_char(c)) - error("illegal input character code %1", c); - else { - line += c; - if (c == '\n') - break; - } - } - int len = line.length(); - if (len == 0) - break; - current_lineno++; - if (len >= 2 && line[0] == '.' && line[1] == '[') { - int start_lineno = current_lineno; - int start_of_line = 1; - string str; - string post; - string pre(line.contents() + 2, line.length() - 3); - for (;;) { - int c = getc(fp); - if (c == EOF) { - error_with_file_and_line(current_filename, start_lineno, - "missing `.]' line"); - break; - } - if (start_of_line) - current_lineno++; - if (start_of_line && c == '.') { - int d = getc(fp); - if (d == ']') { - while ((d = getc(fp)) != '\n' && d != EOF) { - if (illegal_input_char(d)) - error("illegal input character code %1", d); - else - post += d; - } - break; - } - if (d != EOF) - ungetc(d, fp); - } - if (illegal_input_char(c)) - error("illegal input character code %1", c); - else - str += c; - start_of_line = (c == '\n'); - } - if (is_list(str)) { - output_pending_line(); - if (accumulate) - output_references(); - else - error("found `$LIST$' but not accumulating references"); - } - else { - unsigned flags = (accumulate - ? store_reference(str) - : immediately_handle_reference(str)); - if (label_in_text) { - if (accumulate && outfp == stdout) - divert_to_temporary_file(); - if (pending_line.length() == 0) { - warning("can't attach citation to previous line"); - } - else - pending_line.set_length(pending_line.length() - 1); - string punct; - if (move_punctuation) - split_punct(pending_line, punct); - int have_text = pre.length() > 0 || post.length() > 0; - label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET - |FORCE_RIGHT_BRACKET)); - if ((flags & FORCE_LEFT_BRACKET) || !have_text) - pending_line += PRE_LABEL_MARKER; - pending_line += pre; - char lm = LABEL_MARKER + (int)lt; - pending_line += lm; - pending_line += post; - if ((flags & FORCE_RIGHT_BRACKET) || !have_text) - pending_line += POST_LABEL_MARKER; - pending_line += punct; - pending_line += '\n'; - } - } - need_syncing = 1; - } - else if (len >= 4 - && line[0] == '.' && line[1] == 'l' && line[2] == 'f' - && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { - pending_lf_lines += line; - line += '\0'; - if (interpret_lf_args(line.contents() + 3)) - current_lineno--; - } - else if (recognize_R1_R2 - && len >= 4 - && line[0] == '.' && line[1] == 'R' && line[2] == '1' - && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { - line.clear(); - int start_of_line = 1; - int start_lineno = current_lineno; - for (;;) { - int c = getc(fp); - if (c != EOF && start_of_line) - current_lineno++; - if (start_of_line && c == '.') { - c = getc(fp); - if (c == 'R') { - c = getc(fp); - if (c == '2') { - c = getc(fp); - if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { - while (c != EOF && c != '\n') - c = getc(fp); - break; - } - else { - line += '.'; - line += 'R'; - line += '2'; - } - } - else { - line += '.'; - line += 'R'; - } - } - else - line += '.'; - } - if (c == EOF) { - error_with_file_and_line(current_filename, start_lineno, - "missing `.R2' line"); - break; - } - if (illegal_input_char(c)) - error("illegal input character code %1", int(c)); - else { - line += c; - start_of_line = c == '\n'; - } - } - output_pending_line(); - if (accumulate) - output_references(); - else - nreferences = 0; - process_commands(line, current_filename, start_lineno + 1); - need_syncing = 1; - } - else { - output_pending_line(); - pending_line = line; - } - } - need_syncing = 0; - output_pending_line(); - if (fp != stdin) - fclose(fp); -} - -class label_processing_state { - enum { - NORMAL, - PENDING_LABEL, - PENDING_LABEL_POST, - PENDING_LABEL_POST_PRE, - PENDING_POST - } state; - label_type type; // type of pending labels - int count; // number of pending labels - reference **rptr; // pointer to next reference - int rcount; // number of references left - FILE *fp; - int handle_pending(int c); -public: - label_processing_state(reference **, int, FILE *); - ~label_processing_state(); - void process(int c); -}; - -static void output_pending_line() -{ - if (label_in_text && !accumulate && ncitations > 0) { - label_processing_state state(citation, ncitations, outfp); - int len = pending_line.length(); - for (int i = 0; i < len; i++) - state.process((unsigned char)(pending_line[i])); - } - else - put_string(pending_line, outfp); - pending_line.clear(); - if (pending_lf_lines.length() > 0) { - put_string(pending_lf_lines, outfp); - pending_lf_lines.clear(); - } - if (!accumulate) - immediately_output_references(); - if (need_syncing) { - fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); - need_syncing = 0; - } -} - -static void split_punct(string &line, string &punct) -{ - const char *start = line.contents(); - const char *end = start + line.length(); - const char *ptr = start; - const char *last_token_start = 0; - for (;;) { - if (ptr >= end) - break; - last_token_start = ptr; - if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER - || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) - ptr++; - else if (!get_token(&ptr, end)) - break; - } - if (last_token_start) { - const token_info *ti = lookup_token(last_token_start, end); - if (ti->is_punct()) { - punct.append(last_token_start, end - last_token_start); - line.set_length(last_token_start - start); - } - } -} - -static void divert_to_temporary_file() -{ - outfp = xtmpfile(); -} - -static void store_citation(reference *ref) -{ - if (ncitations >= citation_max) { - if (citation == 0) - citation = new reference*[citation_max = 100]; - else { - reference **old_citation = citation; - citation_max *= 2; - citation = new reference *[citation_max]; - memcpy(citation, old_citation, ncitations*sizeof(reference *)); - a_delete old_citation; - } - } - citation[ncitations++] = ref; -} - -static unsigned store_reference(const string &str) -{ - if (reference_hash_table == 0) { - reference_hash_table = new reference *[17]; - hash_table_size = 17; - for (int i = 0; i < hash_table_size; i++) - reference_hash_table[i] = 0; - } - unsigned flags; - reference *ref = make_reference(str, &flags); - ref->compute_hash_code(); - unsigned h = ref->hash(); - reference **ptr; - for (ptr = reference_hash_table + (h % hash_table_size); - *ptr != 0; - ((ptr == reference_hash_table) - ? (ptr = reference_hash_table + hash_table_size - 1) - : --ptr)) - if (same_reference(**ptr, *ref)) - break; - if (*ptr != 0) { - if (ref->is_merged()) - warning("fields ignored because reference already used"); - delete ref; - ref = *ptr; - } - else { - *ptr = ref; - ref->set_number(nreferences); - nreferences++; - ref->pre_compute_label(); - ref->compute_sort_key(); - if (nreferences*2 >= hash_table_size) { - // Rehash it. - reference **old_table = reference_hash_table; - int old_size = hash_table_size; - hash_table_size = next_size(hash_table_size); - reference_hash_table = new reference*[hash_table_size]; - int i; - for (i = 0; i < hash_table_size; i++) - reference_hash_table[i] = 0; - for (i = 0; i < old_size; i++) - if (old_table[i]) { - reference **p; - for (p = (reference_hash_table - + (old_table[i]->hash() % hash_table_size)); - *p; - ((p == reference_hash_table) - ? (p = reference_hash_table + hash_table_size - 1) - : --p)) - ; - *p = old_table[i]; - } - a_delete old_table; - } - } - if (label_in_text) - store_citation(ref); - return flags; -} - -unsigned immediately_handle_reference(const string &str) -{ - unsigned flags; - reference *ref = make_reference(str, &flags); - ref->set_number(nreferences); - if (label_in_text || label_in_reference) { - ref->pre_compute_label(); - ref->immediate_compute_label(); - } - nreferences++; - store_citation(ref); - return flags; -} - -static void immediately_output_references() -{ - for (int i = 0; i < ncitations; i++) { - reference *ref = citation[i]; - if (label_in_reference) { - fputs(".ds [F ", outfp); - const string &label = ref->get_label(NORMAL_LABEL); - if (label.length() > 0 - && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) - putc('"', outfp); - put_string(label, outfp); - putc('\n', outfp); - } - ref->output(outfp); - delete ref; - } - ncitations = 0; -} - -static void output_citation_group(reference **v, int n, label_type type, - FILE *fp) -{ - if (sort_adjacent_labels) { - // Do an insertion sort. Usually n will be very small. - for (int i = 1; i < n; i++) { - int num = v[i]->get_number(); - reference *temp = v[i]; - int j; - for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) - v[j + 1] = v[j]; - v[j + 1] = temp; - } - } - // This messes up if !accumulate. - if (accumulate && n > 1) { - // remove duplicates - int j = 1; - for (int i = 1; i < n; i++) - if (v[i]->get_label(type) != v[i - 1]->get_label(type)) - v[j++] = v[i]; - n = j; - } - string merged_label; - for (int i = 0; i < n; i++) { - int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); - if (nmerged > 0) { - put_string(merged_label, fp); - i += nmerged; - } - else - put_string(v[i]->get_label(type), fp); - if (i < n - 1) - put_string(sep_label, fp); - } -} - - -label_processing_state::label_processing_state(reference **p, int n, FILE *f) -: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) -{ -} - -label_processing_state::~label_processing_state() -{ - int handled = handle_pending(EOF); - assert(!handled); - assert(rcount == 0); -} - -int label_processing_state::handle_pending(int c) -{ - switch (state) { - case NORMAL: - break; - case PENDING_LABEL: - if (c == POST_LABEL_MARKER) { - state = PENDING_LABEL_POST; - return 1; - } - else { - output_citation_group(rptr, count, type, fp); - rptr += count ; - rcount -= count; - state = NORMAL; - } - break; - case PENDING_LABEL_POST: - if (c == PRE_LABEL_MARKER) { - state = PENDING_LABEL_POST_PRE; - return 1; - } - else { - output_citation_group(rptr, count, type, fp); - rptr += count; - rcount -= count; - put_string(post_label, fp); - state = NORMAL; - } - break; - case PENDING_LABEL_POST_PRE: - if (c >= LABEL_MARKER - && c < LABEL_MARKER + N_LABEL_TYPES - && c - LABEL_MARKER == type) { - count += 1; - state = PENDING_LABEL; - return 1; - } - else { - output_citation_group(rptr, count, type, fp); - rptr += count; - rcount -= count; - put_string(sep_label, fp); - state = NORMAL; - } - break; - case PENDING_POST: - if (c == PRE_LABEL_MARKER) { - put_string(sep_label, fp); - state = NORMAL; - return 1; - } - else { - put_string(post_label, fp); - state = NORMAL; - } - break; - } - return 0; -} - -void label_processing_state::process(int c) -{ - if (handle_pending(c)) - return; - assert(state == NORMAL); - switch (c) { - case PRE_LABEL_MARKER: - put_string(pre_label, fp); - state = NORMAL; - break; - case POST_LABEL_MARKER: - state = PENDING_POST; - break; - case LABEL_MARKER: - case LABEL_MARKER + 1: - count = 1; - state = PENDING_LABEL; - type = label_type(c - LABEL_MARKER); - break; - default: - state = NORMAL; - putc(c, fp); - break; - } -} - -extern "C" { - -int rcompare(const void *p1, const void *p2) -{ - return compare_reference(**(reference **)p1, **(reference **)p2); -} - -} - -void output_references() -{ - assert(accumulate); - if (nreferences > 0) { - int j = 0; - int i; - for (i = 0; i < hash_table_size; i++) - if (reference_hash_table[i] != 0) - reference_hash_table[j++] = reference_hash_table[i]; - assert(j == nreferences); - for (; j < hash_table_size; j++) - reference_hash_table[j] = 0; - qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); - for (i = 0; i < nreferences; i++) - reference_hash_table[i]->set_number(i); - compute_labels(reference_hash_table, nreferences); - } - if (outfp != stdout) { - rewind(outfp); - { - label_processing_state state(citation, ncitations, stdout); - int c; - while ((c = getc(outfp)) != EOF) - state.process(c); - } - ncitations = 0; - fclose(outfp); - outfp = stdout; - } - if (nreferences > 0) { - fputs(".]<\n", outfp); - for (int i = 0; i < nreferences; i++) { - if (sort_fields.length() > 0) - reference_hash_table[i]->print_sort_key_comment(outfp); - if (label_in_reference) { - fputs(".ds [F ", outfp); - const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); - if (label.length() > 0 - && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) - putc('"', outfp); - put_string(label, outfp); - putc('\n', outfp); - } - reference_hash_table[i]->output(outfp); - delete reference_hash_table[i]; - reference_hash_table[i] = 0; - } - fputs(".]>\n", outfp); - nreferences = 0; - } - clear_labels(); -} - -static reference *find_reference(const char *query, int query_len) -{ - // This is so that error messages look better. - while (query_len > 0 && csspace(query[query_len - 1])) - query_len--; - string str; - for (int i = 0; i < query_len; i++) - str += query[i] == '\n' ? ' ' : query[i]; - str += '\0'; - possibly_load_default_database(); - search_list_iterator iter(&database_list, str.contents()); - reference_id rid; - const char *start; - int len; - if (!iter.next(&start, &len, &rid)) { - error("no matches for `%1'", str.contents()); - return 0; - } - const char *end = start + len; - while (start < end) { - if (*start == '%') - break; - while (start < end && *start++ != '\n') - ; - } - if (start >= end) { - error("found a reference for `%1' but it didn't contain any fields", - str.contents()); - return 0; - } - reference *result = new reference(start, end - start, &rid); - if (iter.next(&start, &len, &rid)) - warning("multiple matches for `%1'", str.contents()); - return result; -} - -static reference *make_reference(const string &str, unsigned *flagsp) -{ - const char *start = str.contents(); - const char *end = start + str.length(); - const char *ptr = start; - while (ptr < end) { - if (*ptr == '%') - break; - while (ptr < end && *ptr++ != '\n') - ; - } - *flagsp = 0; - for (; start < ptr; start++) { - if (*start == '#') - *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET - | FORCE_LEFT_BRACKET))); - else if (*start == '[') - *flagsp |= FORCE_LEFT_BRACKET; - else if (*start == ']') - *flagsp |= FORCE_RIGHT_BRACKET; - else if (!csspace(*start)) - break; - } - if (start >= end) { - error("empty reference"); - return new reference; - } - reference *database_ref = 0; - if (start < ptr) - database_ref = find_reference(start, ptr - start); - reference *inline_ref = 0; - if (ptr < end) - inline_ref = new reference(ptr, end - ptr); - if (inline_ref) { - if (database_ref) { - database_ref->merge(*inline_ref); - delete inline_ref; - return database_ref; - } - else - return inline_ref; - } - else if (database_ref) - return database_ref; - else - return new reference; -} - -static void do_ref(const string &str) -{ - if (accumulate) - (void)store_reference(str); - else { - (void)immediately_handle_reference(str); - immediately_output_references(); - } -} - -static void trim_blanks(string &str) -{ - const char *start = str.contents(); - const char *end = start + str.length(); - while (end > start && end[-1] != '\n' && csspace(end[-1])) - --end; - str.set_length(end - start); -} - -void do_bib(const char *filename) -{ - FILE *fp; - if (strcmp(filename, "-") == 0) - fp = stdin; - else { - errno = 0; - fp = fopen(filename, "r"); - if (fp == 0) { - error("can't open `%1': %2", filename, strerror(errno)); - return; - } - current_filename = filename; - } - enum { - START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT - } state = START; - string body; - for (;;) { - int c = getc(fp); - if (c == EOF) - break; - if (illegal_input_char(c)) { - error("illegal input character code %1", c); - continue; - } - switch (state) { - case START: - if (c == '%') { - body = c; - state = BODY; - } - else if (c != '\n') - state = MIDDLE; - break; - case MIDDLE: - if (c == '\n') - state = START; - break; - case BODY: - body += c; - if (c == '\n') - state = BODY_START; - break; - case BODY_START: - if (c == '\n') { - do_ref(body); - state = START; - } - else if (c == '.') - state = BODY_DOT; - else if (csspace(c)) { - state = BODY_BLANK; - body += c; - } - else { - body += c; - state = BODY; - } - break; - case BODY_BLANK: - if (c == '\n') { - trim_blanks(body); - do_ref(body); - state = START; - } - else if (csspace(c)) - body += c; - else { - body += c; - state = BODY; - } - break; - case BODY_DOT: - if (c == ']') { - do_ref(body); - state = MIDDLE; - } - else { - body += '.'; - body += c; - state = c == '\n' ? BODY_START : BODY; - } - break; - default: - assert(0); - } - if (c == '\n') - current_lineno++; - } - switch (state) { - case START: - case MIDDLE: - break; - case BODY: - body += '\n'; - do_ref(body); - break; - case BODY_DOT: - case BODY_START: - do_ref(body); - break; - case BODY_BLANK: - trim_blanks(body); - do_ref(body); - break; - } - fclose(fp); -} - -// from the Dragon Book - -unsigned hash_string(const char *s, int len) -{ - const char *end = s + len; - unsigned h = 0, g; - while (s < end) { - h <<= 4; - h += *s++; - if ((g = h & 0xf0000000) != 0) { - h ^= g >> 24; - h ^= g; - } - } - return h; -} - -int next_size(int n) -{ - static const int table_sizes[] = { - 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, - 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, - 16000057, 32000011, 64000031, 128000003, 0 - }; - - const int *p; - for (p = table_sizes; *p <= n && *p != 0; p++) - ; - assert(*p != 0); - return *p; -} - diff --git a/contrib/groff/refer/refer.h b/contrib/groff/refer/refer.h deleted file mode 100644 index f0ab3cd70957..000000000000 --- a/contrib/groff/refer/refer.h +++ /dev/null @@ -1,78 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <string.h> -#include <errno.h> - -#include "errarg.h" -#include "error.h" -#include "lib.h" -#include "stringclass.h" -#include "cset.h" -#include "cmap.h" - -#include "defs.h" - -unsigned hash_string(const char *, int); -int next_size(int); - -extern string capitalize_fields; -extern string reverse_fields; -extern string abbreviate_fields; -extern string period_before_last_name; -extern string period_before_initial; -extern string period_before_hyphen; -extern string period_before_other; -extern string sort_fields; -extern int annotation_field; -extern string annotation_macro; -extern string discard_fields; -extern string articles; -extern int abbreviate_label_ranges; -extern string label_range_indicator; -extern int date_as_label; -extern string join_authors_exactly_two; -extern string join_authors_last_two; -extern string join_authors_default; -extern string separate_label_second_parts; -extern string et_al; -extern int et_al_min_elide; -extern int et_al_min_total; - -extern int compatible_flag; - -extern int set_label_spec(const char *); -extern int set_date_label_spec(const char *); -extern int set_short_label_spec(const char *); - -extern int short_label_flag; - -void clear_labels(); -void command_error(const char *, - const errarg &arg1 = empty_errarg, - const errarg &arg2 = empty_errarg, - const errarg &arg3 = empty_errarg); - -struct reference; - -void compute_labels(reference **, int); diff --git a/contrib/groff/refer/refer.man b/contrib/groff/refer/refer.man deleted file mode 100644 index 13708cf0e568..000000000000 --- a/contrib/groff/refer/refer.man +++ /dev/null @@ -1,1302 +0,0 @@ -.ig \"-*- nroff -*- -Copyright (C) 1989-2000 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the -entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be included in -translations approved by the Free Software Foundation instead of in -the original English. -.. -.de TQ -.br -.ns -.TP \\$1 -.. -.\" Like TP, but if specified indent is more than half -.\" the current line-length - indent, use the default indent. -.de Tp -.ie \\n(.$=0:((0\\$1)*2u>(\\n(.lu-\\n(.iu)) .TP -.el .TP "\\$1" -.. -.\" The BSD man macros can't handle " in arguments to font change macros, -.\" so use \(ts instead of ". -.tr \(ts" -.TH @G@REFER @MAN1EXT@ "@MDATE@" "Groff Version @VERSION@" -.SH NAME -@g@refer \- preprocess bibliographic references for groff -.SH SYNOPSIS -.nr a \n(.j -.ad l -.nr i \n(.i -.in +\w'\fB@g@refer 'u -.ti \niu -.B @g@refer -.de OP -.ie \\n(.$-1 .RI "[\ \fB\\$1\fP" "\\$2" "\ ]" -.el .RB "[\ " "\\$1" "\ ]" -.. -.OP \-benvCPRS -.OP \-a n -.OP \-c fields -.OP \-f n -.OP \-i fields -.OP \-k field -.OP \-l m,n -.OP \-p filename -.OP \-s fields -.OP \-t n -.OP \-B field.macro -.RI [\ filename \|.\|.\|.\ ] -.br -.ad \na -.PP -It is possible to have whitespace between a command line option and its -parameter. -.SH DESCRIPTION -This file documents the GNU version of -.BR refer , -which is part of the groff document formatting system. -.B refer -copies the contents of -.IR filename \|.\|.\|. -to the standard output, -except that lines between -.B .[ -and -.B .] -are interpreted as citations, -and lines between -.B .R1 -and -.B .R2 -are interpreted as commands about how citations are to be processed. -.LP -Each citation specifies a reference. -The citation can specify a reference that is contained in -a bibliographic database by giving a set of keywords -that only that reference contains. -Alternatively it can specify a reference by supplying a database -record in the citation. -A combination of these alternatives is also possible. -.LP -For each citation, -.B refer -can produce a mark in the text. -This mark consists of some label which can be separated from -the text and from other labels in various ways. -For each reference it also outputs -.B groff -commands that can be used by a macro package to produce a formatted -reference for each citation. -The output of -.B refer -must therefore be processed using a suitable macro package. -The -.B \-ms -and -.B \-me -macros are both suitable. -The commands to format a citation's reference can be output immediately after -the citation, -or the references may be accumulated, -and the commands output at some later point. -If the references are accumulated, then multiple citations of the same -reference will produce a single formatted reference. -.LP -The interpretation of lines between -.B .R1 -and -.B .R2 -as commands is a new feature of GNU refer. -Documents making use of this feature can still be processed by -Unix refer just by adding the lines -.RS -.LP -.nf -.ft B -\&.de R1 -\&.ig R2 -\&.. -.ft -.fi -.RE -to the beginning of the document. -This will cause -.B troff -to ignore everything between -.B .R1 -and -.BR .R2 . -The effect of some commands can also be achieved by options. -These options are supported mainly for compatibility with Unix refer. -It is usually more convenient to use commands. -.LP -.B refer -generates -.B .lf -lines so that filenames and line numbers in messages produced -by commands that read -.B refer -output will be correct; -it also interprets lines beginning with -.B .lf -so that filenames and line numbers in the messages and -.B .lf -lines that it produces will be accurate even if the input has been -preprocessed by a command such as -.BR @g@soelim (@MAN1EXT@). -.SH OPTIONS -.LP -Most options are equivalent to commands -(for a description of these commands see the -.B Commands -subsection): -.TP -.B \-b -.B -no-label-in-text; no-label-in-reference -.TP -.B \-e -.B accumulate -.TP -.B \-n -.B no-default-database -.TP -.B \-C -.B compatible -.TP -.B \-P -.B move-punctuation -.TP -.B \-S -.B -label "(A.n|Q) ', ' (D.y|D)"; bracket-label " (" ) "; " -.TP -.BI \-a n -.B reverse -.BI A n -.TP -.BI \-c fields -.B capitalize -.I fields -.TP -.BI \-f n -.B label -.BI % n -.TP -.BI \-i fields -.B search-ignore -.I fields -.TP -.B \-k -.B label -.B L\(ti%a -.TP -.BI \-k field -.B label -.IB field \(ti%a -.TP -.B \-l -.B label -.BI A.nD.y%a -.TP -.BI \-l m -.B label -.BI A.n+ m D.y%a -.TP -.BI \-l, n -.B label -.BI A.nD.y\- n %a -.TP -.BI \-l m , n -.B label -.BI A.n+ m D.y\- n %a -.TP -.BI \-p filename -.B database -.I filename -.TP -.BI \-s spec -.B sort -.I spec -.TP -.BI \-t n -.B search-truncate -.I n -.LP -These options are equivalent to the following commands with the -addition that the filenames specified on the command line are -processed as if they were arguments to the -.B bibliography -command instead of in the normal way: -.TP -.B \-B -.B -annotate X AP; no-label-in-reference -.TP -.BI \-B field . macro -.B annotate -.I field -.IB macro ; -.B no-label-in-reference -.LP -The following options have no equivalent commands: -.TP -.B \-v -Print the version number. -.TP -.B \-R -Don't recognize lines beginning with -.BR .R1 / .R2 . -.SH USAGE -.SS Bibliographic databases -The bibliographic database is a text file consisting of records -separated by one or more blank lines. -Within each record fields start with a -.B % -at the beginning of a line. -Each field has a one character name that immediately follows the -.BR % . -It is best to use only upper and lower case letters for the names -of fields. -The name of the field should be followed by exactly one space, -and then by the contents of the field. -Empty fields are ignored. -The conventional meaning of each field is as follows: -.TP -.B A -The name of an author. -If the name contains a title such as -.B Jr. -at the end, -it should be separated from the last name by a comma. -There can be multiple occurrences of the -.B A -field. -The order is significant. -It is a good idea always to supply an -.B A -field or a -.B Q -field. -.TP -.B B -For an article that is part of a book, the title of the book -.TP -.B C -The place (city) of publication. -.TP -.B D -The date of publication. -The year should be specified in full. -If the month is specified, the name rather than the number of the month -should be used, but only the first three letters are required. -It is a good idea always to supply a -.B D -field; -if the date is unknown, a value such as -.B in press -or -.B unknown -can be used. -.TP -.B E -For an article that is part of a book, the name of an editor of the book. -Where the work has editors and no authors, -the names of the editors should be given as -.B A -fields and -.B ,\ (ed) -or -.B ,\ (eds) -should be appended to the last author. -.TP -.B G -US Government ordering number. -.TP -.B I -The publisher (issuer). -.TP -.B J -For an article in a journal, the name of the journal. -.TP -.B K -Keywords to be used for searching. -.TP -.B L -Label. -.TP -.B N -Journal issue number. -.TP -.B O -Other information. -This is usually printed at the end of the reference. -.TP -.B P -Page number. -A range of pages can be specified as -.IB m \- n\fR. -.TP -.B Q -The name of the author, if the author is not a person. -This will only be used if there are no -.B A -fields. -There can only be one -.B Q -field. -.TP -.B R -Technical report number. -.TP -.B S -Series name. -.TP -.B T -Title. -For an article in a book or journal, -this should be the title of the article. -.TP -.B V -Volume number of the journal or book. -.TP -.B X -Annotation. -.LP -For all fields except -.B A -and -.BR E , -if there is more than one occurrence of a particular field in a record, -only the last such field will be used. -.LP -If accent strings are used, they should follow the character to be accented. -This means that the -.B AM -macro must be used with the -.B \-ms -macros. -Accent strings should not be quoted: -use one -.B \e -rather than two. -.SS Citations -The format of a citation is -.RS -.BI .[ opening-text -.br -.I -flags keywords -.br -.I fields -.br -.BI .] closing-text -.RE -.LP -The -.IR opening-text , -.IR closing-text -and -.I flags -components are optional. -Only one of the -.I keywords -and -.I fields -components need be specified. -.LP -The -.I keywords -component says to search the bibliographic databases for a reference -that contains all the words in -.IR keywords . -It is an error if more than one reference if found. -.LP -The -.I fields -components specifies additional fields to replace or supplement -those specified in the reference. -When references are being accumulated and the -.I keywords -component is non-empty, -then additional fields should be specified only on the first -occasion that a particular reference is cited, -and will apply to all citations of that reference. -.LP -The -.I opening-text -and -.I closing-text -component specifies strings to be used to bracket the label instead -of the strings specified in the -.B bracket-label -command. -If either of these components is non-empty, -the strings specified in the -.B bracket-label -command will not be used; -this behaviour can be altered using the -.B [ -and -.B ] -flags. -Note that leading and trailing spaces are significant for these components. -.LP -The -.I flags -component is a list of -non-alphanumeric characters each of which modifies the treatment -of this particular citation. -Unix refer will treat these flags as part of the keywords and -so will ignore them since they are non-alphanumeric. -The following flags are currently recognized: -.TP -.B # -This says to use the label specified by the -.B short-label -command, -instead of that specified by the -.B label -command. -If no short label has been specified, the normal label will be used. -Typically the short label is used with author-date labels -and consists of only the date and possibly a disambiguating letter; -the -.B # -is supposed to be suggestive of a numeric type of label. -.TP -.B [ -Precede -.I opening-text -with the first string specified in the -.B bracket-label -command. -.TP -.B ] -Follow -.I closing-text -with the second string specified in the -.B bracket-label -command. -.LP -One advantages of using the -.B [ -and -.B ] -flags rather than including the brackets in -.I opening-text -and -.I closing-text -is that -you can change the style of bracket used in the document just by changing the -.B bracket-label -command. -Another advantage is that sorting and merging of citations -will not necessarily be inhibited if the flags are used. -.LP -If a label is to be inserted into the text, -it will be attached to the line preceding the -.B .[ -line. -If there is no such line, then an extra line will be inserted before the -.B .[ -line and a warning will be given. -.LP -There is no special notation for making a citation to multiple references. -Just use a sequence of citations, one for each reference. -Don't put anything between the citations. -The labels for all the citations will be attached to the line preceding -the first citation. -The labels may also be sorted or merged. -See the description of the -.B <> -label expression, and of the -.B sort-adjacent-labels -and -.B abbreviate-label-ranges -command. -A label will not be merged if its citation has a non-empty -.I opening-text -or -.IR closing-text . -However, the labels for a citation using the -.B ] -flag and without any -.I closing-text -immediately followed by a citation using the -.B [ -flag and without any -.I opening-text -may be sorted and merged -even though the first citation's -.I opening-text -or the second citation's -.I closing-text -is non-empty. -(If you wish to prevent this just make the first citation's -.I closing-text -.BR \e& .) -.SS Commands -Commands are contained between lines starting with -.B .R1 -and -.BR .R2 . -Recognition of these lines can be prevented by the -.B \-R -option. -When a -.B .R1 -line is recognized any accumulated references are flushed out. -Neither -.B .R1 -nor -.B .R2 -lines, -nor anything between them -is output. -.LP -Commands are separated by newlines or -.BR ; s. -.B # -introduces a comment that extends to the end of the line -(but does not conceal the newline). -Each command is broken up into words. -Words are separated by spaces or tabs. -A word that begins with -.B \(ts -extends to the next -.B \(ts -that is not followed by another -.BR \(ts . -If there is no such -.B \(ts -the word extends to the end of the line. -Pairs of -.B \(ts -in a word beginning with -.B \(ts -collapse to a single -.BR \(ts . -Neither -.B # -nor -.B ; -are recognized inside -.BR \(ts s. -A line can be continued by ending it with -.BR \e ; -this works everywhere except after a -.BR # . -.LP -.ds n \fR* -Each command -.I name -that is marked with \*n has an associated negative command -.BI no- name -that undoes the effect of -.IR name . -For example, the -.B no-sort -command specifies that references should not be sorted. -The negative commands take no arguments. -.LP -In the following description each argument must be a single word; -.I field -is used for a single upper or lower case letter naming a field; -.I fields -is used for a sequence of such letters; -.I m -and -.I n -are used for a non-negative numbers; -.I string -is used for an arbitrary string; -.I filename -is used for the name of a file. -.Tp \w'\fBabbreviate-label-ranges'u+2n -.BI abbreviate\*n\ fields\ string1\ string2\ string3\ string4 -Abbreviate the first names of -.IR fields . -An initial letter will be separated from another initial letter by -.IR string1 , -from the last name by -.IR string2 , -and from anything else -(such as a -.B von -or -.BR de ) -by -.IR string3 . -These default to a period followed by a space. -In a hyphenated first name, -the initial of the first part of the name will be separated from the hyphen by -.IR string4 ; -this defaults to a period. -No attempt is made to handle any ambiguities that might -result from abbreviation. -Names are abbreviated before sorting and before -label construction. -.TP -.BI abbreviate-label-ranges\*n\ string -Three or more adjacent labels that refer to consecutive references -will be abbreviated to a label consisting -of the first label, followed by -.I string -followed by the last label. -This is mainly useful with numeric labels. -If -.I string -is omitted it defaults to -.BR \- . -.TP -.B accumulate\*n -Accumulate references instead of writing out each reference -as it is encountered. -Accumulated references will be written out whenever a reference -of the form -.RS -.IP -.B .[ -.br -.B $LIST$ -.br -.B .] -.LP -is encountered, -after all input files hve been processed, -and whenever -.B .R1 -line is recognized. -.RE -.TP -.BI annotate\*n\ field\ string -.I field -is an annotation; -print it at the end of the reference as a paragraph preceded by the line -.RS -.IP -.BI . string -.LP -If -.I macro -is omitted it will default to -.BR AP ; -if -.I field -is also omitted it will default to -.BR X . -Only one field can be an annotation. -.RE -.TP -.BI articles\ string \fR\|.\|.\|. -.IR string \|.\|.\|. -are definite or indefinite articles, and should be ignored at the beginning of -.B T -fields when sorting. -Initially, -.BR the , -.B a -and -.B an -are recognized as articles. -.TP -.BI bibliography\ filename \fR\|.\|.\|. -Write out all the references contained in the bibliographic databases -.IR filename \|.\|.\|. -.TP -.BI bracket-label\ string1\ string2\ string3 -In the text, bracket each label -with -.I string1 -and -.IR string2 . -An occurrence of -.I string2 -immediately followed by -.I string1 -will be turned into -.IR string3 . -The default behaviour is -.RS -.IP -.B -bracket-label \e*([. \e*(.] ", " -.RE -.TP -.BI capitalize\ fields -Convert -.I fields -to caps and small caps. -.TP -.B compatible\*n -Recognize -.B .R1 -and -.B .R2 -even when followed by a character other than space or newline. -.TP -.BI database\ filename \fR\|.\|.\|. -Search the bibliographic databases -.IR filename \|.\|.\|. -For each -.I filename -if an index -.IB filename @INDEX_SUFFIX@ -created by -.BR @g@indxbib (@MAN1EXT@) -exists, then it will be searched instead; -each index can cover multiple databases. -.TP -.BI date-as-label\*n\ string -.I string -is a label expression that specifies a string with which to replace the -.B D -field after constructing the label. -See the -.B "Label expressions" -subsection for a description of label expressions. -This command is useful if you do not want explicit labels in the -reference list, but instead want to handle any necessary -disambiguation by qualifying the date in some way. -The label used in the text would typically be some combination of the -author and date. -In most cases you should also use the -.B no-label-in-reference -command. -For example, -.RS -.IP -.B -date-as-label D.+yD.y%a*D.-y -.LP -would attach a disambiguating letter to the year part of the -.B D -field in the reference. -.RE -.TP -.B default-database\*n -The default database should be searched. -This is the default behaviour, so the negative version of -this command is more useful. -refer determines whether the default database should be searched -on the first occasion that it needs to do a search. -Thus a -.B no-default-database -command must be given before then, -in order to be effective. -.TP -.BI discard\*n\ fields -When the reference is read, -.I fields -should be discarded; -no string definitions for -.I fields -will be output. -Initially, -.I fields -are -.BR XYZ . -.TP -.BI et-al\*n\ string\ m\ n -Control use of -.B -et al -in the evaluation of -.B @ -expressions in label expressions. -If the number of authors needed to make the author sequence -unambiguous is -.I u -and the total number of authors is -.I t -then the last -.IR t \|\-\| u -authors will be replaced by -.I string -provided that -.IR t \|\-\| u -is not less than -.I m -and -.I t -is not less than -.IR n . -The default behaviour is -.RS -.IP -.B -et-al " et al" 2 3 -.RE -.TP -.BI include\ filename -Include -.I filename -and interpret the contents as commands. -.TP -.BI join-authors\ string1\ string2\ string3 -This says how authors should be joined together. -When there are exactly two authors, they will be joined with -.IR string1 . -When there are more than two authors, all but the last two will -be joined with -.IR string2 , -and the last two authors will be joined with -.IR string3 . -If -.I string3 -is omitted, -it will default to -.IR string1 ; -if -.I string2 -is also omitted it will also default to -.IR string1 . -For example, -.RS -.IP -.B -join-authors " and " ", " ", and " -.LP -will restore the default method for joining authors. -.RE -.TP -.B label-in-reference\*n -When outputting the reference, -define the string -.B [F -to be the reference's label. -This is the default behaviour; so the negative version -of this command is more useful. -.TP -.B label-in-text\*n -For each reference output a label in the text. -The label will be separated from the surrounding text as described in the -.B bracket-label -command. -This is the default behaviour; so the negative version -of this command is more useful. -.TP -.BI label\ string -.I string -is a label expression describing how to label each reference. -.TP -.BI separate-label-second-parts\ string -When merging two-part labels, separate the second part of the second -label from the first label with -.IR string . -See the description of the -.B <> -label expression. -.TP -.B move-punctuation\*n -In the text, move any punctuation at the end of line past the label. -It is usually a good idea to give this command unless you are using -superscripted numbers as labels. -.TP -.BI reverse\*n\ string -Reverse the fields whose names -are in -.IR string . -Each field name can be followed by a number which says -how many such fields should be reversed. -If no number is given for a field, all such fields will be reversed. -.TP -.BI search-ignore\*n\ fields -While searching for keys in databases for which no index exists, -ignore the contents of -.IR fields . -Initially, fields -.B XYZ -are ignored. -.TP -.BI search-truncate\*n\ n -Only require the first -.I n -characters of keys to be given. -In effect when searching for a given key -words in the database are truncated to the maximum of -.I n -and the length of the key. -Initially -.I n -is 6. -.TP -.BI short-label\*n\ string -.I string -is a label expression that specifies an alternative (usually shorter) -style of label. -This is used when the -.B # -flag is given in the citation. -When using author-date style labels, the identity of the author -or authors is sometimes clear from the context, and so it -may be desirable to omit the author or authors from the label. -The -.B short-label -command will typically be used to specify a label containing just -a date and possibly a disambiguating letter. -.TP -.BI sort\*n\ string -Sort references according to -.BR string . -References will automatically be accumulated. -.I string -should be a list of field names, each followed by a number, -indicating how many fields with the name should be used for sorting. -.B + -can be used to indicate that all the fields with the name should be used. -Also -.B . -can be used to indicate the references should be sorted using the -(tentative) label. -(The -.B -Label expressions -subsection describes the concept of a tentative label.) -.TP -.B sort-adjacent-labels\*n -Sort labels that are adjacent in the text according to their -position in the reference list. -This command should usually be given if the -.B abbreviate-label-ranges -command has been given, -or if the label expression contains a -.B <> -expression. -This will have no effect unless references are being accumulated. -.SS Label expressions -.LP -Label expressions can be evaluated both normally and tentatively. -The result of normal evaluation is used for output. -The result of tentative evaluation, called the -.I -tentative label, -is used to gather the information -that normal evaluation needs to disambiguate the label. -Label expressions specified by the -.B date-as-label -and -.B short-label -commands are not evaluated tentatively. -Normal and tentative evaluation are the same for all types -of expression other than -.BR @ , -.BR * , -and -.B % -expressions. -The description below applies to normal evaluation, -except where otherwise specified. -.TP -.I field -.TQ -.I field\ n -The -.IR n -th -part of -.IR field . -If -.I n -is omitted, it defaults to 1. -.TP -.BI ' string ' -The characters in -.I string -literally. -.TP -.B @ -All the authors joined as specified by the -.B join-authors -command. -The whole of each author's name will be used. -However, if the references are sorted by author -(that is the sort specification starts with -.BR A+ ), -then authors' last names will be used instead, provided that this does -not introduce ambiguity, -and also an initial subsequence of the authors may be used -instead of all the authors, again provided that this does not -introduce ambiguity. -The use of only the last name for the -.IR i -th -author of some reference -is considered to be ambiguous if -there is some other reference, -such that the first -.IR i \|-\|1 -authors of the references are the same, -the -.IR i -th -authors are not the same, -but the -.IR i -th -authors' last names are the same. -A proper initial subsequence of the sequence -of authors for some reference is considered to be ambiguous if there is -a reference with some other sequence of authors which also has -that subsequence as a proper initial subsequence. -When an initial subsequence of authors is used, the remaining -authors are replaced by the string specified by the -.B et-al -command; -this command may also specify additional requirements that must be -met before an initial subsequence can be used. -.B @ -tentatively evaluates to a canonical representation of the authors, -such that authors that compare equally for sorting purpose -will have the same representation. -.TP -.BI % n -.TQ -.B %a -.TQ -.B %A -.TQ -.B %i -.TQ -.B %I -The serial number of the reference formatted according to the character -following the -.BR % . -The serial number of a reference is 1 plus the number of earlier references -with same tentative label as this reference. -These expressions tentatively evaluate to an empty string. -.TP -.IB expr * -If there is another reference with the same tentative label as -this reference, then -.IR expr , -otherwise an empty string. -It tentatively evaluates to an empty string. -.TP -.IB expr + n -.TQ -.IB expr \- n -The first -.RB ( + ) -or last -.RB ( \- ) -.I n -upper or lower case letters or digits of -.IR expr . -Troff special characters (such as -.BR \e('a ) -count as a single letter. -Accent strings are retained but do not count towards the total. -.TP -.IB expr .l -.I expr -converted to lowercase. -.TP -.IB expr .u -.I expr -converted to uppercase. -.TP -.IB expr .c -.I expr -converted to caps and small caps. -.TP -.IB expr .r -.I expr -reversed so that the last name is first. -.TP -.IB expr .a -.I expr -with first names abbreviated. -Note that fields specified in the -.B abbreviate -command are abbreviated before any labels are evaluated. -Thus -.B .a -is useful only when you want a field to be abbreviated in a label -but not in a reference. -.TP -.IB expr .y -The year part of -.IR expr . -.TP -.IB expr .+y -The part of -.I expr -before the year, or the whole of -.I expr -if it does not contain a year. -.TP -.IB expr .\-y -The part of -.I expr -after the year, or an empty string if -.I expr -does not contain a year. -.TP -.IB expr .n -The last name part of -.IR expr . -.TP -.IB expr1 \(ti expr2 -.I expr1 -except that if the last character of -.I expr1 -is -.B \- -then it will be replaced by -.IR expr2 . -.TP -.I expr1\ expr2 -The concatenation of -.I expr1 -and -.IR expr2 . -.TP -.IB expr1 | expr2 -If -.I expr1 -is non-empty then -.I expr1 -otherwise -.IR expr2 . -.TP -.IB expr1 & expr2 -If -.I expr1 -is non-empty -then -.I expr2 -otherwise an empty string. -.TP -.IB expr1 ? expr2 : expr3 -If -.I expr1 -is non-empty -then -.I expr2 -otherwise -.IR expr3 . -.TP -.BI < expr > -The label is in two parts, which are separated by -.IR expr . -Two adjacent two-part labels which have the same first part will be -merged by appending the second part of the second label onto the first -label separated by the string specified in the -.B separate-label-second-parts -command (initially, a comma followed by a space); the resulting label -will also be a two-part label with the same first part as before -merging, and so additional labels can be merged into it. -Note that it is permissible for the first part to be empty; -this maybe desirable for expressions used in the -.B short-label -command. -.TP -.BI ( expr ) -The same as -.IR expr . -Used for grouping. -.LP -The above expressions are listed in order of precedence -(highest first); -.B & -and -.B | -have the same precedence. -.SS Macro interface -Each reference starts with a call to the macro -.BR ]- . -The string -.B [F -will be defined to be the label for this reference, -unless the -.B no-label-in-reference -command has been given. -There then follows a series of string definitions, -one for each field: -string -.BI [ X -corresponds to field -.IR X . -The number register -.B [P -is set to 1 if the -.B P -field contains a range of pages. -The -.BR [T , -.B [A -and -.B [O -number registers are set to 1 according as the -.BR T , -.B A -and -.B O -fields end with one of the characters -.BR .?! . -The -.B [E -number register will be set to 1 if the -.B [E -string contains more than one name. -The reference is followed by a call to the -.B ][ -macro. -The first argument to this macro gives a number representing -the type of the reference. -If a reference contains a -.B J -field, it will be classified as type 1, -otherwise if it contains a -.B B -field, it will type 3, -otherwise if it contains a -.B G -or -.B R -field it will be type 4, -otherwise if contains a -.B I -field it will be type 2, -otherwise it will be type 0. -The second argument is a symbolic name for the type: -.BR other , -.BR journal-article , -.BR book , -.B article-in-book -or -.BR tech-report . -Groups of references that have been accumulated -or are produced by the -.B bibliography -command are preceded by a call to the -.B ]< -macro and followed by a call to the -.B ]> -macro. -.SH FILES -.Tp \w'\fB@DEFAULT_INDEX@'u+2n -.B @DEFAULT_INDEX@ -Default database. -.TP -.IB file @INDEX_SUFFIX@ -Index files. -.SH "SEE ALSO" -.BR @g@indxbib (@MAN1EXT@), -.BR @g@lookbib (@MAN1EXT@), -.BR lkbib (@MAN1EXT@) -.br -.SH BUGS -In label expressions, -.B <> -expressions are ignored inside -.BI . char -expressions. diff --git a/contrib/groff/refer/token.cc b/contrib/groff/refer/token.cc deleted file mode 100644 index 1cf6890f24d4..000000000000 --- a/contrib/groff/refer/token.cc +++ /dev/null @@ -1,378 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -#include "refer.h" -#include "token.h" - -#define TOKEN_TABLE_SIZE 1009 -// I believe in Icelandic thorn sorts after z. -#define THORN_SORT_KEY "{" - -struct token_table_entry { - const char *tok; - token_info ti; - token_table_entry(); -}; - -token_table_entry token_table[TOKEN_TABLE_SIZE]; -int ntokens = 0; - -static void skip_name(const char **ptr, const char *end) -{ - if (*ptr < end) { - switch (*(*ptr)++) { - case '(': - if (*ptr < end) { - *ptr += 1; - if (*ptr < end) - *ptr += 1; - } - break; - case '[': - while (*ptr < end) - if (*(*ptr)++ == ']') - break; - break; - } - } -} - -int get_token(const char **ptr, const char *end) -{ - if (*ptr >= end) - return 0; - char c = *(*ptr)++; - if (c == '\\' && *ptr < end) { - switch (**ptr) { - default: - *ptr += 1; - break; - case '(': - case '[': - skip_name(ptr, end); - break; - case '*': - case 'f': - *ptr += 1; - skip_name(ptr, end); - break; - } - } - return 1; -} - -token_info::token_info() -: type(TOKEN_OTHER), sort_key(0), other_case(0) -{ -} - -void token_info::set(token_type t, const char *sk, const char *oc) -{ - assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER); - type = t; - sort_key = sk; - other_case = oc; -} - -void token_info::sortify(const char *start, const char *end, string &result) - const -{ - if (sort_key) - result += sort_key; - else if (type == TOKEN_UPPER || type == TOKEN_LOWER) { - for (; start < end; start++) - if (csalpha(*start)) - result += cmlower(*start); - } -} - -int token_info::sortify_non_empty(const char *start, const char *end) const -{ - if (sort_key) - return *sort_key != '\0'; - if (type != TOKEN_UPPER && type != TOKEN_LOWER) - return 0; - for (; start < end; start++) - if (csalpha(*start)) - return 1; - return 0; -} - - -void token_info::lower_case(const char *start, const char *end, - string &result) const -{ - if (type != TOKEN_UPPER) { - while (start < end) - result += *start++; - } - else if (other_case) - result += other_case; - else { - while (start < end) - result += cmlower(*start++); - } -} - -void token_info::upper_case(const char *start, const char *end, - string &result) const -{ - if (type != TOKEN_LOWER) { - while (start < end) - result += *start++; - } - else if (other_case) - result += other_case; - else { - while (start < end) - result += cmupper(*start++); - } -} - -token_table_entry::token_table_entry() -: tok(0) -{ -} - -static void store_token(const char *tok, token_type typ, - const char *sk = 0, const char *oc = 0) -{ - unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE; - for (;;) { - if (token_table[n].tok == 0) { - if (++ntokens == TOKEN_TABLE_SIZE) - assert(0); - token_table[n].tok = tok; - break; - } - if (strcmp(tok, token_table[n].tok) == 0) - break; - if (n == 0) - n = TOKEN_TABLE_SIZE - 1; - else - --n; - } - token_table[n].ti.set(typ, sk, oc); -} - - -token_info default_token_info; - -const token_info *lookup_token(const char *start, const char *end) -{ - unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE; - for (;;) { - if (token_table[n].tok == 0) - break; - if (strlen(token_table[n].tok) == end - start - && memcmp(token_table[n].tok, start, end - start) == 0) - return &(token_table[n].ti); - if (n == 0) - n = TOKEN_TABLE_SIZE - 1; - else - --n; - } - return &default_token_info; -} - -static void init_ascii() -{ - const char *p; - for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) { - char buf[2]; - buf[0] = *p; - buf[1] = '\0'; - store_token(strsave(buf), TOKEN_LOWER); - buf[0] = cmupper(buf[0]); - store_token(strsave(buf), TOKEN_UPPER); - } - for (p = "0123456789"; *p; p++) { - char buf[2]; - buf[0] = *p; - buf[1] = '\0'; - const char *s = strsave(buf); - store_token(s, TOKEN_OTHER, s); - } - for (p = ".,:;?!"; *p; p++) { - char buf[2]; - buf[0] = *p; - buf[1] = '\0'; - store_token(strsave(buf), TOKEN_PUNCT); - } - store_token("-", TOKEN_HYPHEN); -} - -static void store_letter(const char *lower, const char *upper, - const char *sort_key = 0) -{ - store_token(lower, TOKEN_LOWER, sort_key, upper); - store_token(upper, TOKEN_UPPER, sort_key, lower); -} - -static void init_letter(unsigned char uc_code, unsigned char lc_code, - const char *sort_key) -{ - char lbuf[2]; - lbuf[0] = lc_code; - lbuf[1] = 0; - char ubuf[2]; - ubuf[0] = uc_code; - ubuf[1] = 0; - store_letter(strsave(lbuf), strsave(ubuf), sort_key); -} - -static void init_latin1() -{ - init_letter(0xc0, 0xe0, "a"); - init_letter(0xc1, 0xe1, "a"); - init_letter(0xc2, 0xe2, "a"); - init_letter(0xc3, 0xe3, "a"); - init_letter(0xc4, 0xe4, "a"); - init_letter(0xc5, 0xe5, "a"); - init_letter(0xc6, 0xe6, "ae"); - init_letter(0xc7, 0xe7, "c"); - init_letter(0xc8, 0xe8, "e"); - init_letter(0xc9, 0xe9, "e"); - init_letter(0xca, 0xea, "e"); - init_letter(0xcb, 0xeb, "e"); - init_letter(0xcc, 0xec, "i"); - init_letter(0xcd, 0xed, "i"); - init_letter(0xce, 0xee, "i"); - init_letter(0xcf, 0xef, "i"); - - init_letter(0xd0, 0xf0, "d"); - init_letter(0xd1, 0xf1, "n"); - init_letter(0xd2, 0xf2, "o"); - init_letter(0xd3, 0xf3, "o"); - init_letter(0xd4, 0xf4, "o"); - init_letter(0xd5, 0xf5, "o"); - init_letter(0xd6, 0xf6, "o"); - init_letter(0xd8, 0xf8, "o"); - init_letter(0xd9, 0xf9, "u"); - init_letter(0xda, 0xfa, "u"); - init_letter(0xdb, 0xfb, "u"); - init_letter(0xdc, 0xfc, "u"); - init_letter(0xdd, 0xfd, "y"); - init_letter(0xde, 0xfe, THORN_SORT_KEY); - - store_token("\337", TOKEN_LOWER, "ss", "SS"); - store_token("\377", TOKEN_LOWER, "y", "Y"); -} - -static void init_two_char_letter(char l1, char l2, char u1, char u2, - const char *sk = 0) -{ - char buf[6]; - buf[0] = '\\'; - buf[1] = '('; - buf[2] = l1; - buf[3] = l2; - buf[4] = '\0'; - const char *p = strsave(buf); - buf[2] = u1; - buf[3] = u2; - store_letter(p, strsave(buf), sk); - buf[1] = '['; - buf[4] = ']'; - buf[5] = '\0'; - p = strsave(buf); - buf[2] = l1; - buf[3] = l2; - store_letter(strsave(buf), p, sk); - -} - -static void init_special_chars() -{ - const char *p; - for (p = "':^`~"; *p; p++) - for (const char *q = "aeiouy"; *q; q++) { - // Use a variable to work around bug in gcc 2.0 - char c = cmupper(*q); - init_two_char_letter(*p, *q, *p, c); - } - for (p = "/l/o~n,coeaeij"; *p; p += 2) { - // Use variables to work around bug in gcc 2.0 - char c0 = cmupper(p[0]); - char c1 = cmupper(p[1]); - init_two_char_letter(p[0], p[1], c0, c1); - } - init_two_char_letter('v', 's', 'v', 'S', "s"); - init_two_char_letter('v', 'z', 'v', 'Z', "z"); - init_two_char_letter('o', 'a', 'o', 'A', "a"); - init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY); - init_two_char_letter('-', 'd', '-', 'D'); - - store_token("\\(ss", TOKEN_LOWER, 0, "SS"); - store_token("\\[ss]", TOKEN_LOWER, 0, "SS"); - - store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D"); - store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]"); - store_token("\\(hy", TOKEN_HYPHEN); - store_token("\\[hy]", TOKEN_HYPHEN); - store_token("\\(en", TOKEN_RANGE_SEP); - store_token("\\[en]", TOKEN_RANGE_SEP); -} - -static void init_strings() -{ - char buf[6]; - buf[0] = '\\'; - buf[1] = '*'; - for (const char *p = "'`^^,:~v_o./;"; *p; p++) { - buf[2] = *p; - buf[3] = '\0'; - store_token(strsave(buf), TOKEN_ACCENT); - buf[2] = '['; - buf[3] = *p; - buf[4] = ']'; - buf[5] = '\0'; - store_token(strsave(buf), TOKEN_ACCENT); - } - - // -ms special letters - store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY); - store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY); - store_letter("\\*(d-", "\\*(D-"); - store_letter("\\*[d-]", "\\*[D-]"); - store_letter("\\*(ae", "\\*(Ae", "ae"); - store_letter("\\*[ae]", "\\*[Ae]", "ae"); - store_letter("\\*(oe", "\\*(Oe", "oe"); - store_letter("\\*[oe]", "\\*[Oe]", "oe"); - - store_token("\\*3", TOKEN_LOWER, "y", "Y"); - store_token("\\*8", TOKEN_LOWER, "ss", "SS"); - store_token("\\*q", TOKEN_LOWER, "o", "O"); -} - -struct token_initer { - token_initer(); -}; - -static token_initer the_token_initer; - -token_initer::token_initer() -{ - init_ascii(); - init_latin1(); - init_special_chars(); - init_strings(); - default_token_info.set(TOKEN_OTHER); -} diff --git a/contrib/groff/refer/token.h b/contrib/groff/refer/token.h deleted file mode 100644 index 6da430d6abad..000000000000 --- a/contrib/groff/refer/token.h +++ /dev/null @@ -1,88 +0,0 @@ -// -*- C++ -*- -/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.com) - -This file is part of groff. - -groff is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -groff is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License along -with groff; see the file COPYING. If not, write to the Free Software -Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -enum token_type { - TOKEN_OTHER, - TOKEN_UPPER, - TOKEN_LOWER, - TOKEN_ACCENT, - TOKEN_PUNCT, - TOKEN_HYPHEN, - TOKEN_RANGE_SEP -}; - -class token_info { -private: - token_type type; - const char *sort_key; - const char *other_case; -public: - token_info(); - void set(token_type, const char *sk = 0, const char *oc = 0); - void lower_case(const char *start, const char *end, string &result) const; - void upper_case(const char *start, const char *end, string &result) const; - void sortify(const char *start, const char *end, string &result) const; - int sortify_non_empty(const char *start, const char *end) const; - int is_upper() const; - int is_lower() const; - int is_accent() const; - int is_other() const; - int is_punct() const; - int is_hyphen() const; - int is_range_sep() const; -}; - -inline int token_info::is_upper() const -{ - return type == TOKEN_UPPER; -} - -inline int token_info::is_lower() const -{ - return type == TOKEN_LOWER; -} - -inline int token_info::is_accent() const -{ - return type == TOKEN_ACCENT; -} - -inline int token_info::is_other() const -{ - return type == TOKEN_OTHER; -} - -inline int token_info::is_punct() const -{ - return type == TOKEN_PUNCT; -} - -inline int token_info::is_hyphen() const -{ - return type == TOKEN_HYPHEN; -} - -inline int token_info::is_range_sep() const -{ - return type == TOKEN_RANGE_SEP; -} - -int get_token(const char **ptr, const char *end); -const token_info *lookup_token(const char *start, const char *end); |