diff options
Diffstat (limited to 'gnu/lib')
26 files changed, 0 insertions, 15730 deletions
diff --git a/gnu/lib/libg++/libg++/regex.cc b/gnu/lib/libg++/libg++/regex.cc deleted file mode 100644 index 40b8498a694d..000000000000 --- a/gnu/lib/libg++/libg++/regex.cc +++ /dev/null @@ -1,2757 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 1985, 1989-90 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - - -// This is a translation into C++ of regex.c, the GNU regexp package. - -/* To test, compile with -Dtest. This Dtestable feature turns this into - a self-contained program which reads a pattern, describes how it - compiles, then reads a string and searches for it. - - On the other hand, if you compile with both -Dtest and -Dcanned you - can run some tests we've already thought of. */ - -/* AIX requires the alloca decl to be the first thing in the file. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else -#ifdef sparc -#include <alloca.h> -extern "C" void *__builtin_alloca(...); -#else -#ifdef _AIX -#pragma alloca -#else -char *alloca (); -#endif -#endif -#endif - -#ifdef emacs - -/* The `emacs' switch turns on certain special matching commands - that make sense only in emacs. */ - -#include "config.h" -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -#else /* not emacs */ - -#include <_G_config.h> -#include <string.h> -#include <stdlib.h> - -/* Define the syntax stuff, so we can do the \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 -#endif - -#define SYNTAX(c) re_syntax_table[c] - - -#ifdef SYNTAX_TABLE - -char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -static char re_syntax_table[256]; - - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - - memset (re_syntax_table, 0, sizeof re_syntax_table); - - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; - - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; - - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; - - done = 1; -} - -#endif /* SYNTAX_TABLE */ -#endif /* emacs */ - -/* We write fatal error messages on standard error. */ -#include <stdio.h> - -/* isalpha(3) etc. are used for the character classes. */ -#include <ctype.h> -/* Sequents are missing isgraph. */ -#ifndef isgraph -#define isgraph(c) (isprint((c)) && !isspace((c))) -#endif - -/* Get the interface, including the syntax bits. */ -#include <gnuregex.h> - - -/* These are the command codes that appear in compiled regular - expressions, one per byte. Some command codes are followed by - argument bytes. A command code can specify any interpretation - whatsoever for its arguments. Zero-bytes may appear in the compiled - regular expression. - - The value of `exactn' is needed in search.c (search_buffer) in emacs. - So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of - `exactn' we use here must also be 1. */ - -enum regexpcode - { - unused=0, - exactn=1, /* Followed by one byte giving n, then by n literal bytes. */ - begline, /* Fail unless at beginning of line. */ - endline, /* Fail unless at end of line. */ - jump, /* Followed by two bytes giving relative address to jump to. */ - on_failure_jump, /* Followed by two bytes giving relative address of - place to resume at in case of failure. */ - finalize_jump, /* Throw away latest failure point and then jump to - address. */ - maybe_finalize_jump, /* Like jump but finalize if safe to do so. - This is used to jump back to the beginning - of a repeat. If the command that follows - this jump is clearly incompatible with the - one at the beginning of the repeat, such that - we can be sure that there is no use backtracking - out of repetitions already completed, - then we finalize. */ - dummy_failure_jump, /* Jump, and push a dummy failure point. This - failure point will be thrown away if an attempt - is made to use it for a failure. A + construct - makes this before the first repeat. Also - use it as an intermediary kind of jump when - compiling an or construct. */ - succeed_n, /* Used like on_failure_jump except has to succeed n times; - then gets turned into an on_failure_jump. The relative - address following it is useless until then. The - address is followed by two bytes containing n. */ - jump_n, /* Similar to jump, but jump n times only; also the relative - address following is in turn followed by yet two more bytes - containing n. */ - set_number_at, /* Set the following relative location to the - subsequent number. */ - anychar, /* Matches any (more or less) one character. */ - charset, /* Matches any one char belonging to specified set. - First following byte is number of bitmap bytes. - Then come bytes for a bitmap saying which chars are in. - Bits in each byte are ordered low-bit-first. - A character is in the set if its bit is 1. - A character too large to have a bit in the map - is automatically not in the set. */ - charset_not, /* Same parameters as charset, but match any character - that is not one of those specified. */ - start_memory, /* Start remembering the text that is matched, for - storing in a memory register. Followed by one - byte containing the register number. Register numbers - must be in the range 0 through RE_NREGS. */ - stop_memory, /* Stop remembering the text that is matched - and store it in a memory register. Followed by - one byte containing the register number. Register - numbers must be in the range 0 through RE_NREGS. */ - duplicate, /* Match a duplicate of something remembered. - Followed by one byte containing the index of the memory - register. */ -#ifdef emacs - before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ -#endif - begbuf, /* Succeeds if at beginning of buffer. */ - endbuf, /* Succeeds if at end of buffer. */ - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ -#ifdef emacs - ,syntaxspec, /* Matches any character whose syntax is specified. - followed by a byte which contains a syntax code, - e.g., Sword. */ - notsyntaxspec /* Matches any character whose syntax differs from - that specified. */ -#endif - }; - - -/* Number of failure points to allocate space for initially, - when matching. If this number is exceeded, more space is allocated, - so it is not a hard limit. */ - -#ifndef NFAILURES -#define NFAILURES 80 -#endif - - -#ifndef SIGN_EXTEND_CHAR -#ifdef __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char)(c)) -#else -#define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele. */ -#endif -#endif /* not SIGN_EXTEND_CHAR */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ -#define STORE_NUMBER(destination, number) \ - { (destination)[0] = (char)((number) & 0377); \ - (destination)[1] = (number) >> 8; } - -/* Same as STORE_NUMBER, except increment the destination pointer to - the byte after where the number is stored. Watch out that values for - DESTINATION such as p + 1 won't work, whereas p will. */ -#define STORE_NUMBER_AND_INCR(destination, number) \ - { STORE_NUMBER(destination, number); \ - (destination) += 2; } - - -/* Put into DESTINATION a number stored in two contingous bytes starting - at SOURCE. */ -#define EXTRACT_NUMBER(destination, source) \ - { (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; } - -/* Same as EXTRACT_NUMBER, except increment the pointer for source to - point to second byte of SOURCE. Note that SOURCE has to be a value - such as p, not, e.g., p + 1. */ -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - { EXTRACT_NUMBER (destination, source); \ - (source) += 2; } - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit-mask comprised of the various bits - defined in regex.h. */ - -int -re_set_syntax (int syntax) -{ - int ret; - - ret = obscure_syntax; - obscure_syntax = syntax; - return ret; -} - -/* Set by re_set_syntax to the current regexp syntax to recognize. */ -int obscure_syntax = 0; - - - -/* Macros for re_compile_pattern, which is found below these definitions. */ - -#define CHAR_CLASS_MAX_LENGTH 6 - -/* Fetch the next character in the uncompiled pattern, translating it if - necessary. */ -#define PATFETCH(c) \ - {if (p == pend) goto end_of_pattern; \ - c = * (const unsigned char *) p++; \ - if (translate) c = translate[c]; } - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - {if (p == pend) goto end_of_pattern; \ - c = * (const unsigned char *) p++; } - -#define PATUNFETCH p-- - - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 28 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - { \ - while (b - bufp->buffer + (n) >= bufp->allocated) \ - EXTEND_BUFFER; \ - } - -/* Make sure we have one more byte of buffer space and then add CH to it. */ -#define BUFPUSH(ch) \ - { \ - GET_BUFFER_SPACE (1); \ - *b++ = (char) (ch); \ - } - -/* Extend the buffer by twice its current size via reallociation and - reset the pointers that pointed into the old allocation to point to - the correct places in the new allocation. If extending the buffer - results in it being larger than 1 << 16, then flag memory exhausted. */ -#define EXTEND_BUFFER \ - { char *old_buffer = bufp->buffer; \ - if (bufp->allocated == (1L<<16)) goto too_big; \ - bufp->allocated *= 2; \ - if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \ - bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated); \ - if (bufp->buffer == 0) \ - goto memory_exhausted; \ - b = (b - old_buffer) + bufp->buffer; \ - if (fixup_jump) \ - fixup_jump = (fixup_jump - old_buffer) + bufp->buffer; \ - if (laststart) \ - laststart = (laststart - old_buffer) + bufp->buffer; \ - begalt = (begalt - old_buffer) + bufp->buffer; \ - if (pending_exact) \ - pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ - } - -/* Set the bit for character C in a character set list. */ -#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH)) - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (isdigit (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -/* Subroutines for re_compile_pattern. */ -static void store_jump (char *from, char opcode, char *to); -static void insert_jump (char op, char *from, char *to, char *current_end); -static void store_jump_n (char *from, char opcode, char *to, unsigned n); -static void insert_jump_n (char, char *, char *, char *, unsigned); -static void insert_op_2 (char, char *, char *_end, int, int); - - -/* re_compile_pattern takes a regular-expression string - and converts it into a buffer full of byte commands for matching. - - PATTERN is the address of the pattern string - SIZE is the length of it. - BUFP is a struct re_pattern_buffer * which points to the info - on where to store the byte commands. - This structure contains a char * which points to the - actual space, which should have been obtained with malloc. - re_compile_pattern may use realloc to grow the buffer space. - - The number of bytes of commands can be found out by looking in - the `struct re_pattern_buffer' that bufp pointed to, after - re_compile_pattern returns. */ - -char * -re_compile_pattern (const char *pattern, int size, struct re_pattern_buffer *bufp) -{ - register char *b = bufp->buffer; - register const char *p = pattern; - const char *pend = pattern + size; - register unsigned c, c1; - const char *p1; - unsigned char *translate = (unsigned char *) bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell whether a new exact-match - character can be added to that command or requires a new `exactn' - command. */ - - char *pending_exact = 0; - - /* Address of the place where a forward-jump should go to the end of - the containing expression. Each alternative of an `or', except the - last, ends with a forward-jump of this sort. */ - - char *fixup_jump = 0; - - /* Address of start of the most recently finished expression. - This tells postfix * where to find the start of its operand. */ - - char *laststart = 0; - - /* In processing a repeat, 1 means zero matches is allowed. */ - - char zero_times_ok; - - /* In processing a repeat, 1 means many matches is allowed. */ - - char many_times_ok; - - /* Address of beginning of regexp, or inside of last \(. */ - - char *begalt = b; - - /* In processing an interval, at least this many matches must be made. */ - int lower_bound; - - /* In processing an interval, at most this many matches can be made. */ - int upper_bound; - - /* Place in pattern (i.e., the {) to which to go back if the interval - is invalid. */ - const char *beg_interval = 0; - - /* Stack of information saved by \( and restored by \). - Four stack elements are pushed by each \(: - First, the value of b. - Second, the value of fixup_jump. - Third, the value of regnum. - Fourth, the value of begalt. */ - - int stackb[40]; - int *stackp = stackb; - int *stacke = stackb + 40; - int *stackt; - - /* Counts \('s as they are encountered. Remembered for the matching \), - where it becomes the register number to put in the stop_memory - command. */ - - unsigned regnum = 1; - - bufp->fastmap_accurate = 0; - -#ifndef emacs -#ifndef SYNTAX_TABLE - /* Initialize the syntax table. */ - init_syntax_once(); -#endif -#endif - - if (bufp->allocated == 0) - { - bufp->allocated = INIT_BUF_SIZE; - if (bufp->buffer) - /* EXTEND_BUFFER loses when bufp->allocated is 0. */ - bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE); - else - /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = (char *) malloc (INIT_BUF_SIZE); - if (!bufp->buffer) goto memory_exhausted; - begalt = b = bufp->buffer; - } - - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '$': - { - const char *p1 = p; - /* When testing what follows the $, - look past the \-constructs that don't consume anything. */ - if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) - while (p1 != pend) - { - if (*p1 == '\\' && p1 + 1 != pend - && (p1[1] == '<' || p1[1] == '>' - || p1[1] == '`' || p1[1] == '\'' -#ifdef emacs - || p1[1] == '=' -#endif - || p1[1] == 'b' || p1[1] == 'B')) - p1 += 2; - else - break; - } - if (obscure_syntax & RE_TIGHT_VBAR) - { - if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend) - goto normal_char; - /* Make operand of last vbar end before this `$'. */ - if (fixup_jump) - store_jump (fixup_jump, jump, b); - fixup_jump = 0; - BUFPUSH (endline); - break; - } - /* $ means succeed if at end of line, but only in special contexts. - If validly in the middle of a pattern, it is a normal character. */ - - if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend) - goto invalid_pattern; - if (p1 == pend || *p1 == '\n' - || (obscure_syntax & RE_CONTEXT_INDEP_OPS) - || (obscure_syntax & RE_NO_BK_PARENS - ? *p1 == ')' - : *p1 == '\\' && p1[1] == ')') - || (obscure_syntax & RE_NO_BK_VBAR - ? *p1 == '|' - : *p1 == '\\' && p1[1] == '|')) - { - BUFPUSH (endline); - break; - } - goto normal_char; - } - case '^': - /* ^ means succeed if at beg of line, but only if no preceding - pattern. */ - - if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart) - goto invalid_pattern; - if (laststart && p - 2 >= pattern && p[-2] != '\n' - && !(obscure_syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - if (obscure_syntax & RE_TIGHT_VBAR) - { - if (p != pattern + 1 - && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - BUFPUSH (begline); - begalt = b; - } - else - BUFPUSH (begline); - break; - - case '+': - case '?': - if ((obscure_syntax & RE_BK_PLUS_QM) - || (obscure_syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern, char not special. */ - if (!laststart) - { - if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) - goto invalid_pattern; - else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - /* If there is a sequence of repetition chars, - collapse it down to just one. */ - zero_times_ok = 0; - many_times_ok = 0; - while (1) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - if (p == pend) - break; - PATFETCH (c); - if (c == '*') - ; - else if (!(obscure_syntax & RE_BK_PLUS_QM) - && (c == '+' || c == '?')) - ; - else if ((obscure_syntax & RE_BK_PLUS_QM) - && c == '\\') - { - int c1; - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - c = c1; - } - else - { - PATUNFETCH; - break; - } - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { - /* If more than one repetition is allowed, put in at the - end a backward relative jump from b to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). */ - GET_BUFFER_SPACE (3); - store_jump (b, maybe_finalize_jump, laststart - 3); - b += 3; /* Because store_jump put stuff here. */ - } - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - insert_jump (on_failure_jump, laststart, b + 3, b); - pending_exact = 0; - b += 3; - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - dummy-failure before the initial on-failure-jump - instruction of the loop. This effects a skip over that - instruction the first time we hit that loop. */ - GET_BUFFER_SPACE (6); - insert_jump (dummy_failure_jump, laststart, laststart + 6, b); - b += 3; - } - break; - - case '.': - laststart = b; - BUFPUSH (anychar); - break; - - case '[': - if (p == pend) - goto invalid_pattern; - while (b - bufp->buffer - > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH) - EXTEND_BUFFER; - - laststart = b; - if (*p == '^') - { - BUFPUSH (charset_not); - p++; - } - else - BUFPUSH (charset); - p1 = p; - - BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - /* Clear the whole map */ - memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); - - if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not) - SET_LIST_BIT ('\n'); - - - /* Read in characters and ranges, setting map bits. */ - while (1) - { - /* Don't translate while fetching, in case it's a range bound. - When we set the bit for the character, we translate it. */ - PATFETCH_RAW (c); - - /* If set, \ escapes characters when inside [...]. */ - if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\') - { - PATFETCH(c1); - SET_LIST_BIT (c1); - continue; - } - if (c == ']') - { - if (p == p1 + 1) - { - /* If this is an empty bracket expression. */ - if ((obscure_syntax & RE_NO_EMPTY_BRACKETS) - && p == pend) - goto invalid_pattern; - } - else - /* Stop if this isn't merely a ] inside a bracket - expression, but rather the end of a bracket - expression. */ - break; - } - /* Get a range. */ - if (p[0] == '-' && p[1] != ']') - { - PATFETCH (c1); - /* Don't translate the range bounds while fetching them. */ - PATFETCH_RAW (c1); - - if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1) - goto invalid_pattern; - - if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END) - && c1 == '-' && *p != ']') - goto invalid_pattern; - - while (c <= c1) - { - /* Translate each char that's in the range. */ - if (translate) - SET_LIST_BIT (translate[c]); - else - SET_LIST_BIT (c); - c++; - } - } - else if ((obscure_syntax & RE_CHAR_CLASSES) - && c == '[' && p[0] == ':') - { - /* Longest valid character class word has six characters. */ - char str[CHAR_CLASS_MAX_LENGTH]; - PATFETCH (c); - c1 = 0; - /* If no ] at end. */ - if (p == pend) - goto invalid_pattern; - while (1) - { - /* Don't translate the ``character class'' characters. */ - PATFETCH_RAW (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) - break; - str[c1++] = c; - } - str[c1] = '\0'; - if (p == pend - || c == ']' /* End of the bracket expression. */ - || p[0] != ']' - || p + 1 == pend - || (strcmp (str, "alpha") != 0 - && strcmp (str, "upper") != 0 - && strcmp (str, "lower") != 0 - && strcmp (str, "digit") != 0 - && strcmp (str, "alnum") != 0 - && strcmp (str, "xdigit") != 0 - && strcmp (str, "space") != 0 - && strcmp (str, "print") != 0 - && strcmp (str, "punct") != 0 - && strcmp (str, "graph") != 0 - && strcmp (str, "cntrl") != 0)) - { - /* Undo the ending character, the letters, and leave - the leading : and [ (but set bits for them). */ - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - } - else - { - /* The ] at the end of the character class. */ - PATFETCH (c); - if (c != ']') - goto invalid_pattern; - for (c = 0; c < (1 << BYTEWIDTH); c++) - { - if ((strcmp (str, "alpha") == 0 && isalpha (c)) - || (strcmp (str, "upper") == 0 && isupper (c)) - || (strcmp (str, "lower") == 0 && islower (c)) - || (strcmp (str, "digit") == 0 && isdigit (c)) - || (strcmp (str, "alnum") == 0 && isalnum (c)) - || (strcmp (str, "xdigit") == 0 && isxdigit (c)) - || (strcmp (str, "space") == 0 && isspace (c)) - || (strcmp (str, "print") == 0 && isprint (c)) - || (strcmp (str, "punct") == 0 && ispunct (c)) - || (strcmp (str, "graph") == 0 && isgraph (c)) - || (strcmp (str, "cntrl") == 0 && iscntrl (c))) - SET_LIST_BIT (c); - } - } - } - else if (translate) - SET_LIST_BIT (translate[c]); - else - SET_LIST_BIT (c); - } - - /* Discard any character set/class bitmap bytes that are all - 0 at the end of the map. Decrement the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - break; - - case '(': - if (! (obscure_syntax & RE_NO_BK_PARENS)) - goto normal_char; - else - goto handle_open; - - case ')': - if (! (obscure_syntax & RE_NO_BK_PARENS)) - goto normal_char; - else - goto handle_close; - - case '\n': - if (! (obscure_syntax & RE_NEWLINE_OR)) - goto normal_char; - else - goto handle_bar; - - case '|': - if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) - && (! laststart || p == pend)) - goto invalid_pattern; - else if (! (obscure_syntax & RE_NO_BK_VBAR)) - goto normal_char; - else - goto handle_bar; - - case '{': - if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES) - && (obscure_syntax & RE_INTERVALS))) - goto normal_char; - else - goto handle_interval; - - case '\\': - if (p == pend) goto invalid_pattern; - PATFETCH_RAW (c); - switch (c) - { - case '(': - if (obscure_syntax & RE_NO_BK_PARENS) - goto normal_backsl; - handle_open: - if (stackp == stacke) goto nesting_too_deep; - - /* Laststart should point to the start_memory that we are about - to push (unless the pattern has RE_NREGS or more ('s). */ - *stackp++ = b - bufp->buffer; - if (regnum < RE_NREGS) - { - BUFPUSH (start_memory); - BUFPUSH (regnum); - } - *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0; - *stackp++ = regnum++; - *stackp++ = begalt - bufp->buffer; - fixup_jump = 0; - laststart = 0; - begalt = b; - break; - - case ')': - if (obscure_syntax & RE_NO_BK_PARENS) - goto normal_backsl; - handle_close: - if (stackp == stackb) goto unmatched_close; - begalt = *--stackp + bufp->buffer; - if (fixup_jump) - store_jump (fixup_jump, jump, b); - if (stackp[-1] < RE_NREGS) - { - BUFPUSH (stop_memory); - BUFPUSH (stackp[-1]); - } - stackp -= 2; - fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0; - laststart = *--stackp + bufp->buffer; - break; - - case '|': - if ((obscure_syntax & RE_LIMITED_OPS) - || (obscure_syntax & RE_NO_BK_VBAR)) - goto normal_backsl; - handle_bar: - if (obscure_syntax & RE_LIMITED_OPS) - goto normal_char; - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (6); - insert_jump (on_failure_jump, begalt, b + 6, b); - pending_exact = 0; - b += 3; - /* The alternative before the previous alternative has a - jump after it which gets executed if it gets matched. - Adjust that jump so it will jump to the previous - alternative's analogous jump (put in below, which in - turn will jump to the next (if any) alternative's such - jump, etc.). The last such jump jumps to the correct - final destination. */ - if (fixup_jump) - store_jump (fixup_jump, jump, b); - - /* Leave space for a jump after previous alternative---to be - filled in later. */ - fixup_jump = b; - b += 3; - - laststart = 0; - begalt = b; - break; - - case '{': - if (! (obscure_syntax & RE_INTERVALS) - /* Let \{ be a literal. */ - || ((obscure_syntax & RE_INTERVALS) - && (obscure_syntax & RE_NO_BK_CURLY_BRACES)) - /* If it's the string "\{". */ - || (p - 2 == pattern && p == pend)) - goto normal_backsl; - handle_interval: - beg_interval = p - 1; /* The {. */ - /* If there is no previous pattern, this isn't an interval. */ - if (!laststart) - { - if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) - goto invalid_pattern; - else - goto normal_backsl; - } - /* It also isn't an interval if not preceded by an re - matching a single character or subexpression, or if - the current type of intervals can't handle back - references and the previous thing is a back reference. */ - if (! (*laststart == anychar - || *laststart == charset - || *laststart == charset_not - || *laststart == start_memory - || (*laststart == exactn && laststart[1] == 1) - || (! (obscure_syntax & RE_NO_BK_REFS) - && *laststart == duplicate))) - { - if (obscure_syntax & RE_NO_BK_CURLY_BRACES) - goto normal_char; - - /* Posix extended syntax is handled in previous - statement; this is for Posix basic syntax. */ - if (obscure_syntax & RE_INTERVALS) - goto invalid_pattern; - - goto normal_backsl; - } - lower_bound = -1; /* So can see if are set. */ - upper_bound = -1; - GET_UNSIGNED_NUMBER (lower_bound); - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) - upper_bound = RE_DUP_MAX; - } - if (upper_bound < 0) - upper_bound = lower_bound; - if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES)) - { - if (c != '\\') - goto invalid_pattern; - PATFETCH (c); - } - if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound - || ((obscure_syntax & RE_NO_BK_CURLY_BRACES) - && p != pend && *p == '{')) - { - if (obscure_syntax & RE_NO_BK_CURLY_BRACES) - goto unfetch_interval; - else - goto invalid_pattern; - } - - /* If upper_bound is zero, don't want to succeed at all; - jump from laststart to b + 3, which will be the end of - the buffer after this jump is inserted. */ - - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - insert_jump (jump, laststart, b + 3, b); - b += 3; - } - - /* Otherwise, after lower_bound number of succeeds, jump - to after the jump_n which will be inserted at the end - of the buffer, and insert that jump_n. */ - else - { /* Set to 5 if only one repetition is allowed and - hence no jump_n is inserted at the current end of - the buffer; then only space for the succeed_n is - needed. Otherwise, need space for both the - succeed_n and the jump_n. */ - - unsigned slots_needed = upper_bound == 1 ? 5 : 10; - - GET_BUFFER_SPACE ((int) slots_needed); - /* Initialize the succeed_n to n, even though it will - be set by its attendant set_number_at, because - re_compile_fastmap will need to know it. Jump to - what the end of buffer will be after inserting - this succeed_n and possibly appending a jump_n. */ - insert_jump_n (succeed_n, laststart, b + slots_needed, - b, lower_bound); - b += 5; /* Just increment for the succeed_n here. */ - - /* More than one repetition is allowed, so put in at - the end of the buffer a backward jump from b to the - succeed_n we put in above. By the time we've gotten - to this jump when matching, we'll have matched once - already, so jump back only upper_bound - 1 times. */ - - if (upper_bound > 1) - { - store_jump_n (b, jump_n, laststart, upper_bound - 1); - b += 5; - /* When hit this when matching, reset the - preceding jump_n's n to upper_bound - 1. */ - BUFPUSH (set_number_at); - GET_BUFFER_SPACE (2); - STORE_NUMBER_AND_INCR (b, -5); - STORE_NUMBER_AND_INCR (b, upper_bound - 1); - } - /* When hit this when matching, set the succeed_n's n. */ - GET_BUFFER_SPACE (5); - insert_op_2 (set_number_at, laststart, b, 5, lower_bound); - b += 5; - } - pending_exact = 0; - beg_interval = 0; - break; - - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - if (beg_interval) - p = beg_interval; - else - { - fprintf (stderr, - "regex: no interval beginning to which to backtrack.\n"); - exit (1); - } - - beg_interval = 0; - PATFETCH (c); /* normal_char expects char in `c'. */ - goto normal_char; - break; - -#ifdef emacs - case '=': - BUFPUSH (at_dot); - break; - - case 's': - laststart = b; - BUFPUSH (syntaxspec); - PATFETCH (c); - BUFPUSH (syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - BUFPUSH (notsyntaxspec); - PATFETCH (c); - BUFPUSH (syntax_spec_code[c]); - break; -#endif /* emacs */ - - case 'w': - laststart = b; - BUFPUSH (wordchar); - break; - - case 'W': - laststart = b; - BUFPUSH (notwordchar); - break; - - case '<': - BUFPUSH (wordbeg); - break; - - case '>': - BUFPUSH (wordend); - break; - - case 'b': - BUFPUSH (wordbound); - break; - - case 'B': - BUFPUSH (notwordbound); - break; - - case '`': - BUFPUSH (begbuf); - break; - - case '\'': - BUFPUSH (endbuf); - break; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (obscure_syntax & RE_NO_BK_REFS) - goto normal_char; - c1 = c - '0'; - if (c1 >= regnum) - { - if (obscure_syntax & RE_NO_EMPTY_BK_REF) - goto invalid_pattern; - else - goto normal_char; - } - /* Can't back reference to a subexpression if inside of it. */ - for (stackt = stackp - 2; stackt > stackb; stackt -= 4) - if (*stackt == c1) - goto normal_char; - laststart = b; - BUFPUSH (duplicate); - BUFPUSH (c1); - break; - - case '+': - case '?': - if (obscure_syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backsl; - break; - - default: - normal_backsl: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - if (translate) c = translate[c]; - goto normal_char; - } - break; - - default: - normal_char: /* Expects the character in `c'. */ - if (!pending_exact || pending_exact + *pending_exact + 1 != b - || *pending_exact == 0177 || *p == '*' || *p == '^' - || ((obscure_syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((obscure_syntax & RE_INTERVALS) - && ((obscure_syntax & RE_NO_BK_CURLY_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - laststart = b; - BUFPUSH (exactn); - pending_exact = b; - BUFPUSH (0); - } - BUFPUSH (c); - (*pending_exact)++; - } - } - - if (fixup_jump) - store_jump (fixup_jump, jump, b); - - if (stackp != stackb) goto unmatched_open; - - bufp->used = b - bufp->buffer; - return 0; - - invalid_pattern: - return "Invalid regular expression"; - - unmatched_open: - return "Unmatched \\("; - - unmatched_close: - return "Unmatched \\)"; - - end_of_pattern: - return "Premature end of regular expression"; - - nesting_too_deep: - return "Nesting too deep"; - - too_big: - return "Regular expression too big"; - - memory_exhausted: - return "Memory exhausted"; -} - - -/* Store a jump of the form <OPCODE> <relative address>. - Store in the location FROM a jump operation to jump to relative - address FROM - TO. OPCODE is the opcode to store. */ - -static void -store_jump (char *from, char opcode, char *to) -{ - from[0] = opcode; - STORE_NUMBER(from + 1, to - (from + 3)); -} - - -/* Open up space before char FROM, and insert there a jump to TO. - CURRENT_END gives the end of the storage not in use, so we know - how much data to copy up. OP is the opcode of the jump to insert. - - If you call this function, you must zero out pending_exact. */ - -static void -insert_jump (char op, char *from, char *to, char *current_end) -{ - register char *pfrom = current_end; /* Copy from here... */ - register char *pto = current_end + 3; /* ...to here. */ - - while (pfrom != from) - *--pto = *--pfrom; - store_jump (from, op, to); -} - - -/* Store a jump of the form <opcode> <relative address> <n> . - - Store in the location FROM a jump operation to jump to relative - address FROM - TO. OPCODE is the opcode to store, N is a number the - jump uses, say, to decide how many times to jump. - - If you call this function, you must zero out pending_exact. */ - -static void -store_jump_n (char *from, char opcode, char *to, unsigned n) -{ - from[0] = opcode; - STORE_NUMBER (from + 1, to - (from + 3)); - STORE_NUMBER (from + 3, n); -} - - -/* Similar to insert_jump, but handles a jump which needs an extra - number to handle minimum and maximum cases. Open up space at - location FROM, and insert there a jump to TO. CURRENT_END gives the - end of the storage in use, so we know how much data to copy up. OP is - the opcode of the jump to insert. - - If you call this function, you must zero out pending_exact. */ - -static void -insert_jump_n (char op, char *from, char *to, char *current_end, unsigned n) -{ - register char *pfrom = current_end; /* Copy from here... */ - register char *pto = current_end + 5; /* ...to here. */ - - while (pfrom != from) - *--pto = *--pfrom; - store_jump_n (from, op, to, n); -} - - -/* Open up space at location THERE, and insert operation OP followed by - NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so - we know how much data to copy up. - - If you call this function, you must zero out pending_exact. */ - -static void -insert_op_2 (char op, char *there, char *current_end, int num_1, int num_2) -{ - register char *pfrom = current_end; /* Copy from here... */ - register char *pto = current_end + 5; /* ...to here. */ - - while (pfrom != there) - *--pto = *--pfrom; - - there[0] = op; - STORE_NUMBER (there + 1, num_1); - STORE_NUMBER (there + 3, num_2); -} - - - -/* Given a pattern, compute a fastmap from it. The fastmap records - which of the (1 << BYTEWIDTH) possible characters can start a string - that matches the pattern. This fastmap is used by re_search to skip - quickly over totally implausible text. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as bufp->fastmap. - The other components of bufp describe the pattern to be used. */ - -void -re_compile_fastmap (struct re_pattern_buffer *bufp) -{ - unsigned char *pattern = (unsigned char *) bufp->buffer; - int size = bufp->used; - register char *fastmap = bufp->fastmap; - register unsigned char *p = pattern; - register unsigned char *pend = pattern + size; - register int j, k; - unsigned char *translate = (unsigned char *) bufp->translate; - - unsigned char *stackb[NFAILURES]; - unsigned char **stackp = stackb; - - unsigned is_a_succeed_n; - - memset (fastmap, 0, (1 << BYTEWIDTH)); - bufp->fastmap_accurate = 1; - bufp->can_be_null = 0; - - while (p) - { - is_a_succeed_n = 0; - if (p == pend) - { - bufp->can_be_null = 1; - break; - } -#ifdef SWITCH_ENUM_BUG - switch ((int) ((enum regexpcode) *p++)) -#else - switch ((enum regexpcode) *p++) -#endif - { - case exactn: - if (translate) - fastmap[translate[p[1]]] = 1; - else - fastmap[p[1]] = 1; - break; - - case unused: - case begline: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: -#endif - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - continue; - - case endline: - if (translate) - fastmap[translate['\n']] = 1; - else - fastmap['\n'] = 1; - - if (bufp->can_be_null != 1) - bufp->can_be_null = 2; - break; - - case jump_n: - case finalize_jump: - case maybe_finalize_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - /* Jump backward reached implies we just went through - the body of a loop and matched nothing. - Opcode jumped to should be an on_failure_jump. - Just treat it like an ordinary jump. - For a * loop, it has pushed its failure point already; - If so, discard that as redundant. */ - - if ((enum regexpcode) *p != on_failure_jump - && (enum regexpcode) *p != succeed_n) - continue; - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (stackp != stackb && *stackp == p) - stackp--; - continue; - - case on_failure_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - *++stackp = p + j; - if (is_a_succeed_n) - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - continue; - - case succeed_n: - is_a_succeed_n = 1; - /* Get to the number of times to succeed. */ - p += 2; - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - goto handle_on_failure_jump; - } - continue; - - case set_number_at: - p += 4; - continue; - - case start_memory: - case stop_memory: - p++; - continue; - - case duplicate: - bufp->can_be_null = 1; - fastmap['\n'] = 1; - case anychar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (j != '\n') - fastmap[j] = 1; - if (bufp->can_be_null) - return; - /* Don't return; check the alternative paths - so we can set can_be_null if appropriate. */ - break; - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; -#endif /* not emacs */ - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - { - if (translate) - fastmap[translate[j]] = 1; - else - fastmap[j] = 1; - } - break; - - case charset_not: - /* Chars beyond end of map must be allowed */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - if (translate) - fastmap[translate[j]] = 1; - else - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - { - if (translate) - fastmap[translate[j]] = 1; - else - fastmap[j] = 1; - } - break; - } - - /* Get here means we have successfully found the possible starting - characters of one path of the pattern. We need not follow this - path any farther. Instead, look at the next alternative - remembered in the stack. */ - if (stackp != stackb) - p = *stackp--; - else - break; - } -} - - - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (struct re_pattern_buffer *pbufp, - char *string, - int size, - int startpos, - int range, - struct re_registers *regs) -{ - return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, - regs, size); -} - - -/* Using the compiled pattern in PBUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. RANGE is the number of - places to try before giving up. If RANGE is negative, it searches - backwards, i.e., the starting positions tried are STARTPOS, STARTPOS - - 1, etc. STRING1 and STRING2 are of SIZE1 and SIZE2, respectively. - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire PBUFP->buffer and its contained - subexpressions. Do not consider matching one past the index MSTOP in - the virtual concatenation of STRING1 and STRING2. - - The value returned is the position in the strings at which the match - was found, or -1 if no match was found, or -2 if error (such as - failure stack overflow). */ - -int -re_search_2 (struct re_pattern_buffer *pbufp, - char *string1, int size1, - char *string2, int size2, - int startpos, - register int range, - struct re_registers *regs, - int mstop) -{ - register char *fastmap = pbufp->fastmap; - register unsigned char *translate = (unsigned char *) pbufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - int val; - - /* Check for out-of-range starting position. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up range if it would eventually take startpos outside of the - virtual concatenation of string1 and string2. */ - if (endpos < -1) - range = -1 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* Update the fastmap now if not correct already. */ - if (fastmap && !pbufp->fastmap_accurate) - re_compile_fastmap (pbufp); - - /* If the search isn't to be a backwards one, don't waste time in a - long search for a pattern that says it is anchored. */ - if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf - && range > 0) - { - if (startpos > 0) - return -1; - else - range = 1; - } - - while (1) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot possibly be the start of a match. Note, however, that - if the pattern can possibly match the null string, we must - test it at each starting point so that we take the first null - string we get. */ - - if (fastmap && startpos < total_size && pbufp->can_be_null != 1) - { - if (range > 0) /* Searching forwards. */ - { - register int lim = 0; - register unsigned char *p; - int irange = range; - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - p = ((unsigned char *) - &(startpos >= size1 ? string2 - size1 : string1)[startpos]); - - while (range > lim && !fastmap[translate - ? translate[*p++] - : *p++]) - range--; - startpos += irange - range; - } - else /* Searching backwards. */ - { - register unsigned char c; - - if (string1 == 0 || startpos >= size1) - c = string2[startpos - size1]; - else - c = string1[startpos]; - - c &= 0xff; - if (translate ? !fastmap[translate[c]] : !fastmap[c]) - goto advance; - } - } - - if (range >= 0 && startpos == total_size - && fastmap && pbufp->can_be_null == 0) - return -1; - - val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, - regs, mstop); - if (val >= 0) - return startpos; - if (val == -2) - return -2; - -#ifdef C_ALLOCA - alloca (0); -#endif /* C_ALLOCA */ - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} - - - -#ifndef emacs /* emacs never uses this. */ -int -re_match (struct re_pattern_buffer *pbufp, - char *string, - int size, - int pos, - struct re_registers *regs) -{ - return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size); -} -#endif /* not emacs */ - - -/* The following are used for re_match_2, defined below: */ - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed. */ - -int re_max_failures = 2000; - -/* Routine used by re_match_2. */ -static int bcmp_translate (char *, char *, int, unsigned char *); - - -/* Structure and accessing macros used in re_match_2: */ - -struct register_info -{ - unsigned is_active : 1; - unsigned matched_something : 1; -}; - -#define IS_ACTIVE(R) ((R).is_active) -#define MATCHED_SOMETHING(R) ((R).matched_something) - - -/* Macros used by re_match_2: */ - - -/* I.e., regstart, regend, and reg_info. */ - -#define NUM_REG_ITEMS 3 - -/* We push at most this many things on the stack whenever we - fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are - arguments to the PUSH_FAILURE_POINT macro. */ - -#define MAX_NUM_FAILURE_ITEMS (RE_NREGS * NUM_REG_ITEMS + 2) - - -/* We push this many things on the stack whenever we fail. */ - -#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + 2) - - -/* This pushes most of the information about the current state we will want - if we ever fail back to it. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place) \ - { \ - short last_used_reg, this_reg; \ - \ - /* Find out how many registers are active or have been matched. \ - (Aside from register zero, which is only set at the end.) */ \ - for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\ - if (regstart[last_used_reg] != (unsigned char *) -1) \ - break; \ - \ - if (stacke - stackp < NUM_FAILURE_ITEMS) \ - { \ - unsigned char **stackx; \ - int len = stacke - stackb; \ - if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \ - return -2; \ - \ - /* Roughly double the size of the stack. */ \ - stackx = (unsigned char **) alloca (2 * len \ - * sizeof (unsigned char *));\ - /* Only copy what is in use. */ \ - memcpy (stackx, stackb, len * sizeof (char *)); \ - stackp = stackx + (stackp - stackb); \ - stackb = stackx; \ - stacke = stackb + 2 * len; \ - } \ - \ - /* Now push the info for each of those registers. */ \ - for (this_reg = 1; this_reg <= last_used_reg; this_reg++) \ - { \ - *stackp++ = regstart[this_reg]; \ - *stackp++ = regend[this_reg]; \ - *stackp++ = (unsigned char *) ®_info[this_reg]; \ - } \ - \ - /* Push how many registers we saved. */ \ - *stackp++ = (unsigned char *) last_used_reg; \ - \ - *stackp++ = pattern_place; \ - *stackp++ = string_place; \ - } - - -/* This pops what PUSH_FAILURE_POINT pushes. */ - -#define POP_FAILURE_POINT() \ - { \ - int temp; \ - stackp -= 2; /* Remove failure points. */ \ - temp = (int) *--stackp; /* How many regs pushed. */ \ - temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \ - stackp -= temp; /* Remove the register info. */ \ - } - - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Is true if there is a first string and if PTR is pointing anywhere - inside it or just past the end. */ - -#define IS_IN_FIRST_STRING(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ - -#define PREFETCH \ - while (d == dend) \ - { \ - /* end of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* end of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Call this when have matched something; it sets `matched' flags for the - registers corresponding to the subexpressions of which we currently - are inside. */ -#define SET_REGS_MATCHED \ - { unsigned this_reg; \ - for (this_reg = 0; this_reg < RE_NREGS; this_reg++) \ - { \ - if (IS_ACTIVE(reg_info[this_reg])) \ - MATCHED_SOMETHING(reg_info[this_reg]) = 1; \ - else \ - MATCHED_SOMETHING(reg_info[this_reg]) = 0; \ - } \ - } - -/* Test if at very beginning or at very end of the virtual concatenation - of string1 and string2. If there is only one string, we've put it in - string2. */ - -#define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END (d == end2) - -#define AT_WORD_BOUNDARY \ - (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d)) - -/* We have two special cases to check for: - 1) if we're past the end of string1, we have to look at the first - character in string2; - 2) if we're before the beginning of string2, we have to look at the - last character in string1; we assume there is a string1, so use - this in conjunction with AT_STRINGS_BEG. */ -#define IS_A_LETTER(d) \ - (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\ - == Sword) - - -/* Match the pattern described by PBUFP against the virtual - concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2, - respectively. Start the match at index POS in the virtual - concatenation of STRING1 and STRING2. In REGS, return the indices of - the virtual concatenation of STRING1 and STRING2 that matched the - entire PBUFP->buffer and its contained subexpressions. Do not - consider matching one past the index MSTOP in the virtual - concatenation of STRING1 and STRING2. - - If pbufp->fastmap is nonzero, then it had better be up to date. - - The reason that the data to match are specified as two components - which are to be regarded as concatenated is so this function can be - used directly on the contents of an Emacs buffer. - - -1 is returned if there is no match. -2 is returned if there is an - error (such as match stack overflow). Otherwise the value is the - length of the substring which was matched. */ - -int -re_match_2 (struct re_pattern_buffer *pbufp, - char *string1_arg, int size1, - char *string2_arg, int size2, - int pos, - struct re_registers *regs, - int mstop) -{ - register unsigned char *p = (unsigned char *) pbufp->buffer; - - /* Pointer to beyond end of buffer. */ - register unsigned char *pend = p + pbufp->used; - - unsigned char *string1 = (unsigned char *) string1_arg; - unsigned char *string2 = (unsigned char *) string2_arg; - unsigned char *end1; /* Just past end of first string. */ - unsigned char *end2; /* Just past end of second string. */ - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - unsigned char *end_match_1, *end_match_2; - - register unsigned char *d, *dend; - register int mcnt; /* Multipurpose. */ - unsigned char *translate = (unsigned char *) pbufp->translate; - unsigned is_a_jump_n = 0; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to the - subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where to - resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is a - ``dummy''; if a failure happens and the failure point is a dummy, it - gets discarded and the next next one is tried. */ - - unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES]; - unsigned char **stackb = initial_stack; - unsigned char **stackp = stackb; - unsigned char **stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES]; - - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ - - unsigned char *regstart[RE_NREGS]; - unsigned char *regend[RE_NREGS]; - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ - - struct register_info reg_info[RE_NREGS]; - - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - - unsigned best_regs_set = 0; - unsigned char *best_regstart[RE_NREGS]; - unsigned char *best_regend[RE_NREGS]; - - /* Initialize subexpression text positions to -1 to mark ones that no - \( or ( and \) or ) has been seen for. Also set all registers to - inactive and mark them as not having matched anything or ever - failed. */ - for (mcnt = 0; mcnt < RE_NREGS; mcnt++) - { - regstart[mcnt] = regend[mcnt] = (unsigned char *) -1; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - if (regs) - for (mcnt = 0; mcnt < RE_NREGS; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - - /* Set up pointers to ends of strings. - Don't allow the second string to be empty unless both are empty. */ - if (size2 == 0) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (mstop <= size1) - { - end_match_1 = string1 + mstop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + mstop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. `dend' - is the end of the input string that `d' points within. `d' is - advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal string2. */ - - if (size1 != 0 && pos <= size1) - d = string1 + pos, dend = end_match_1; - else - d = string2 + pos - size1, dend = end_match_2; - - - /* This loops over pattern commands. It exits by returning from the - function if match is complete, or it drops through if match fails - at this starting point in the input data. */ - - while (1) - { - is_a_jump_n = 0; - /* End of pattern means we might have succeeded. */ - if (p == pend) - { - /* If not end of string, try backtracking. Otherwise done. */ - if (d != end_match_2) - { - if (stackp != stackb) - { - /* More failure points to try. */ - - unsigned in_same_string = - IS_IN_FIRST_STRING (best_regend[0]) - == MATCHING_IN_FIRST_STRING; - - /* If exceeds best match so far, save it. */ - if (! best_regs_set - || (in_same_string && d > best_regend[0]) - || (! in_same_string && ! MATCHING_IN_FIRST_STRING)) - { - best_regs_set = 1; - best_regend[0] = d; /* Never use regstart[0]. */ - - for (mcnt = 1; mcnt < RE_NREGS; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) - { - restore_best_regs: - /* Restore best match. */ - d = best_regend[0]; - - for (mcnt = 0; mcnt < RE_NREGS; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } - - /* If caller wants register contents data back, convert it - to indices. */ - if (regs) - { - regs->start[0] = pos; - if (MATCHING_IN_FIRST_STRING) - regs->end[0] = d - string1; - else - regs->end[0] = d - string2 + size1; - for (mcnt = 1; mcnt < RE_NREGS; mcnt++) - { - if (regend[mcnt] == (unsigned char *) -1) - { - regs->start[mcnt] = -1; - regs->end[mcnt] = -1; - continue; - } - if (IS_IN_FIRST_STRING (regstart[mcnt])) - regs->start[mcnt] = regstart[mcnt] - string1; - else - regs->start[mcnt] = regstart[mcnt] - string2 + size1; - - if (IS_IN_FIRST_STRING (regend[mcnt])) - regs->end[mcnt] = regend[mcnt] - string1; - else - regs->end[mcnt] = regend[mcnt] - string2 + size1; - } - } - return d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - } - - /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((enum regexpcode) *p++)) -#else - switch ((enum regexpcode) *p++) -#endif - { - - /* \( [or `(', as appropriate] is represented by start_memory, - \) by stop_memory. Both of those commands are followed by - a register number in the next byte. The text matched - within the \( and \) is recorded under that number. */ - case start_memory: - regstart[*p] = d; - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - p++; - break; - - case stop_memory: - regend[*p] = d; - IS_ACTIVE (reg_info[*p]) = 0; - - /* If just failed to match something this time around with a sub- - expression that's in a loop, try to force exit from the loop. */ - if ((! MATCHED_SOMETHING (reg_info[*p]) - || (enum regexpcode) p[-3] == start_memory) - && (p + 1) != pend) - { - register unsigned char *p2 = p + 1; - mcnt = 0; - switch (*p2++) - { - case jump_n: - is_a_jump_n = 1; - case finalize_jump: - case maybe_finalize_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p2); - if (is_a_jump_n) - p2 += 2; - break; - } - p2 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump, exit from the loop by forcing a - failure after pushing on the stack the on_failure_jump's - jump in the pattern, and d. */ - if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump) - { - EXTRACT_NUMBER_AND_INCR (mcnt, p2); - PUSH_FAILURE_POINT (p2 + mcnt, d); - goto fail; - } - } - p++; - break; - - /* \<digit> has been turned into a `duplicate' command which is - followed by the numeric value of <digit> as the register number. */ - case duplicate: - { - int regno = *p++; /* Get which register to match against */ - register unsigned char *d2, *dend2; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((IS_IN_FIRST_STRING (regstart[regno]) - == IS_IN_FIRST_STRING (regend[regno])) - ? regend[regno] : end_match_1); - while (1) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - d2 = string2, dend2 = regend[regno]; /* end of string1 => advance to string2. */ - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH; - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate ((char*)d, (char*)d2, mcnt, translate) - : memcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - } - } - break; - - case anychar: - PREFETCH; /* Fetch a data character. */ - /* Match anything but a newline, maybe even a null. */ - if ((translate ? translate[*d] : *d) == '\n' - || ((obscure_syntax & RE_DOT_NOT_NULL) - && (translate ? translate[*d] : *d) == '\000')) - goto fail; - SET_REGS_MATCHED; - d++; - break; - - case charset: - case charset_not: - { - int not = 0; /* Nonzero for charset_not. */ - register int c; - if (*(p - 1) == (unsigned char) charset_not) - not = 1; - - PREFETCH; /* Fetch a data character. */ - - if (translate) - c = translate[*d]; - else - c = *d; - - if (c < *p * BYTEWIDTH - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - SET_REGS_MATCHED; - d++; - break; - } - - case begline: - if ((size1 != 0 && d == string1) - || (size1 == 0 && size2 != 0 && d == string2) - || (d && d[-1] == '\n') - || (size1 == 0 && size2 == 0)) - break; - else - goto fail; - - case endline: - if (d == end2 - || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n')) - break; - goto fail; - - /* `or' constructs are handled by starting each alternative with - an on_failure_jump that points to the start of the next - alternative. Each alternative except the last ends with a - jump to the joining point. (Actually, each jump except for - the last one really jumps to the following jump, because - tensioning the jumps is a hassle.) */ - - /* The start of a stupid repeat has an on_failure_jump that points - past the end of the repeat text. This makes a failure point so - that on failure to match a repetition, matching restarts past - as many repetitions have been found with no way to fail and - look for another one. */ - - /* A smart repeat is similar but loops back to the on_failure_jump - so that each repetition makes another failure point. */ - - case on_failure_jump: - on_failure: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - PUSH_FAILURE_POINT (p + mcnt, d); - break; - - /* The end of a smart repeat has a maybe_finalize_jump back. - Change it either to a finalize_jump or an ordinary jump. */ - case maybe_finalize_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - { - register unsigned char *p2 = p; - /* Compare what follows with the beginning of the repeat. - If we can establish that there is nothing that they would - both match, we can change to finalize_jump. */ - while (p2 + 1 != pend - && (*p2 == (unsigned char) stop_memory - || *p2 == (unsigned char) start_memory)) - p2 += 2; /* Skip over reg number. */ - if (p2 == pend) - p[-3] = (unsigned char) finalize_jump; - else if (*p2 == (unsigned char) exactn - || *p2 == (unsigned char) endline) - { - register int c = *p2 == (unsigned char) endline ? '\n' : p2[2]; - register unsigned char *p1 = p + mcnt; - /* p1[0] ... p1[2] are an on_failure_jump. - Examine what follows that. */ - if (p1[3] == (unsigned char) exactn && p1[5] != c) - p[-3] = (unsigned char) finalize_jump; - else if (p1[3] == (unsigned char) charset - || p1[3] == (unsigned char) charset_not) - { - int not = p1[3] == (unsigned char) charset_not; - if (c < p1[4] * BYTEWIDTH - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - /* `not' is 1 if c would match. */ - /* That means it is not safe to finalize. */ - if (!not) - p[-3] = (unsigned char) finalize_jump; - } - } - } - p -= 2; /* Point at relative address again. */ - if (p[-1] != (unsigned char) finalize_jump) - { - p[-1] = (unsigned char) jump; - goto nofinalize; - } - /* Note fall through. */ - - /* The end of a stupid repeat has a finalize_jump back to the - start, where another failure point will be made which will - point to after all the repetitions found so far. */ - - /* Take off failure points put on by matching on_failure_jump - because didn't fail. Also remove the register information - put on by the on_failure_jump. */ - case finalize_jump: - POP_FAILURE_POINT (); - /* Note fall through. */ - - /* Jump without taking off any failure points. */ - case jump: - nofinalize: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p += mcnt; - break; - - case dummy_failure_jump: - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at finalize_jump. We will end up at - finalize_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for finalize_jump to pop. */ - PUSH_FAILURE_POINT (0, 0); - goto nofinalize; - - - /* Have to succeed matching what follows at least n times. Then - just handle like an on_failure_jump. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); - } - else if (mcnt == 0) - { - p[2] = unused; - p[3] = unused; - goto on_failure; - } - else - { - fprintf (stderr, "regex: the succeed_n's n is not set.\n"); - exit (1); - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER(p + 2, mcnt); - goto nofinalize; /* Do the jump without taking off - any failure points. */ - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - register unsigned char *p1; - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); - STORE_NUMBER (p1, mcnt); - break; - } - - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case unused: - break; - - case wordbound: - if (AT_WORD_BOUNDARY) - break; - goto fail; - - case notwordbound: - if (AT_WORD_BOUNDARY) - goto fail; - break; - - case wordbeg: - /* Have to check if AT_STRINGS_BEG before looking at d - 1. */ - if (IS_A_LETTER (d) && (AT_STRINGS_BEG || !IS_A_LETTER (d - 1))) - break; - goto fail; - - case wordend: - /* Have to check if AT_STRINGS_BEG before looking at d - 1. */ - if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1) - && (!IS_A_LETTER (d) || AT_STRINGS_END)) - break; - goto fail; - -#ifdef emacs - case before_dot: - if (PTR_CHAR_POS (d) >= point) - goto fail; - break; - - case at_dot: - if (PTR_CHAR_POS (d) != point) - goto fail; - break; - - case after_dot: - if (PTR_CHAR_POS (d) <= point) - goto fail; - break; - - case wordchar: - mcnt = (int) Sword; - goto matchsyntax; - - case syntaxspec: - mcnt = *p++; - matchsyntax: - PREFETCH; - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail; - SET_REGS_MATCHED; - break; - - case notwordchar: - mcnt = (int) Sword; - goto matchnotsyntax; - - case notsyntaxspec: - mcnt = *p++; - matchnotsyntax: - PREFETCH; - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail; - SET_REGS_MATCHED; - break; - -#else /* not emacs */ - - case wordchar: - PREFETCH; - if (!IS_A_LETTER (d)) - goto fail; - SET_REGS_MATCHED; - break; - - case notwordchar: - PREFETCH; - if (IS_A_LETTER (d)) - goto fail; - SET_REGS_MATCHED; - break; - -#endif /* not emacs */ - - case begbuf: - if (AT_STRINGS_BEG) - break; - goto fail; - - case endbuf: - if (AT_STRINGS_END) - break; - goto fail; - - case exactn: - /* Match the next few pattern characters exactly. - mcnt is how many characters to match. */ - mcnt = *p++; - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH; - if (translate[*d++] != *p++) goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH; - if (*d++ != *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED; - break; - } - continue; /* Successfully executed one pattern command; keep going. */ - - /* Jump here if any matching operation fails. */ - fail: - if (stackp != stackb) - /* A restart point is known. Restart there and pop it. */ - { - short last_used_reg, this_reg; - - /* If this failure point is from a dummy_failure_point, just - skip it. */ - if (!stackp[-2]) - { - POP_FAILURE_POINT (); - goto fail; - } - - d = *--stackp; - p = *--stackp; - if (d >= string1 && d <= end1) - dend = end_match_1; - /* Restore register info. */ - last_used_reg = (short) (int) *--stackp; - - /* Make the ones that weren't saved -1 or 0 again. */ - for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--) - { - regend[this_reg] = (unsigned char *) -1; - regstart[this_reg] = (unsigned char *) -1; - IS_ACTIVE (reg_info[this_reg]) = 0; - MATCHED_SOMETHING (reg_info[this_reg]) = 0; - } - - /* And restore the rest from the stack. */ - for ( ; this_reg > 0; this_reg--) - { - reg_info[this_reg] = *(struct register_info *) *--stackp; - regend[this_reg] = *--stackp; - regstart[this_reg] = *--stackp; - } - } - else - break; /* Matching at this starting point really fails. */ - } - - if (best_regs_set) - goto restore_best_regs; - return -1; /* Failure to match. */ -} - - -static int -bcmp_translate (char *s1, char *s2, int len, unsigned char *translate) -{ - register unsigned char *p1 = (unsigned char*)s1; - register unsigned char *p2 = (unsigned char*)s2; - while (len) - { - if (translate [*p1++] != translate [*p2++]) return 1; - len--; - } - return 0; -} - - - -/* Entry points compatible with 4.2 BSD regex library. */ - -#if 0 - -static struct re_pattern_buffer re_comp_buf; - -char * -re_comp (char *s) -{ - if (!s) - { - if (!re_comp_buf.buffer) - return "No previous regular expression"; - return 0; - } - - if (!re_comp_buf.buffer) - { - if (!(re_comp_buf.buffer = (char *) malloc (200))) - return "Memory exhausted"; - re_comp_buf.allocated = 200; - if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH))) - return "Memory exhausted"; - } - return re_compile_pattern (s, strlen (s), &re_comp_buf); -} - -int -re_exec (char *s) -{ - int len = strlen (s); - return 0 <= re_search (&re_comp_buf, s, len, 0, len, - (struct re_registers *) 0); -} -#endif /* not emacs */ - - - -#ifdef test - -#include <stdio.h> - -/* Indexed by a character, gives the upper case equivalent of the - character. */ - -char upcase[0400] = - { 000, 001, 002, 003, 004, 005, 006, 007, - 010, 011, 012, 013, 014, 015, 016, 017, - 020, 021, 022, 023, 024, 025, 026, 027, - 030, 031, 032, 033, 034, 035, 036, 037, - 040, 041, 042, 043, 044, 045, 046, 047, - 050, 051, 052, 053, 054, 055, 056, 057, - 060, 061, 062, 063, 064, 065, 066, 067, - 070, 071, 072, 073, 074, 075, 076, 077, - 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137, - 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107, - 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117, - 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127, - 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177, - 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207, - 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217, - 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227, - 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237, - 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247, - 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257, - 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267, - 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277, - 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307, - 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317, - 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327, - 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337, - 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347, - 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357, - 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367, - 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377 - }; - -#ifdef canned - -#include "tests.h" - -typedef enum { extended_test, basic_test } test_type; - -/* Use this to run the tests we've thought of. */ - -void -main () -{ - test_type t = extended_test; - - if (t == basic_test) - { - printf ("Running basic tests:\n\n"); - test_posix_basic (); - } - else if (t == extended_test) - { - printf ("Running extended tests:\n\n"); - test_posix_extended (); - } -} - -#else /* not canned */ - -/* Use this to run interactive tests. */ - -void -main (int argc, char **argv) -{ - char pat[80]; - struct re_pattern_buffer buf; - int i; - char c; - char fastmap[(1 << BYTEWIDTH)]; - - /* Allow a command argument to specify the style of syntax. */ - if (argc > 1) - obscure_syntax = atoi (argv[1]); - - buf.allocated = 40; - buf.buffer = (char *) malloc (buf.allocated); - buf.fastmap = fastmap; - buf.translate = upcase; - - while (1) - { - gets (pat); - - if (*pat) - { - re_compile_pattern (pat, strlen(pat), &buf); - - for (i = 0; i < buf.used; i++) - printchar (buf.buffer[i]); - - putchar ('\n'); - - printf ("%d allocated, %d used.\n", buf.allocated, buf.used); - - re_compile_fastmap (&buf); - printf ("Allowed by fastmap: "); - for (i = 0; i < (1 << BYTEWIDTH); i++) - if (fastmap[i]) printchar (i); - putchar ('\n'); - } - - gets (pat); /* Now read the string to match against */ - - i = re_match (&buf, pat, strlen (pat), 0, 0); - printf ("Match value %d.\n", i); - } -} - -#endif - - -#ifdef NOTDEF -void -print_buf (struct re_pattern_buffer *bufpbufp) -{ - int i; - - printf ("buf is :\n----------------\n"); - for (i = 0; i < bufp->used; i++) - printchar (bufp->buffer[i]); - - printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used); - - printf ("Allowed by fastmap: "); - for (i = 0; i < (1 << BYTEWIDTH); i++) - if (bufp->fastmap[i]) - printchar (i); - printf ("\nAllowed by translate: "); - if (bufp->translate) - for (i = 0; i < (1 << BYTEWIDTH); i++) - if (bufp->translate[i]) - printchar (i); - printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't"); - printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not"); -} -#endif /* NOTDEF */ - -void -printchar (char c) -{ - if (c < 040 || c >= 0177) - { - putchar ('\\'); - putchar (((c >> 6) & 3) + '0'); - putchar (((c >> 3) & 7) + '0'); - putchar ((c & 7) + '0'); - } - else - putchar (c); -} - -void -error (char *string) -{ - puts (string); - exit (1); -} -#endif /* test */ diff --git a/gnu/lib/libmalloc/free.c b/gnu/lib/libmalloc/free.c deleted file mode 100644 index 7d2a77c8a2f4..000000000000 --- a/gnu/lib/libmalloc/free.c +++ /dev/null @@ -1,210 +0,0 @@ -/* Free a block of memory allocated by `malloc'. - Copyright 1990, 1991, 1992 Free Software Foundation - Written May 1989 by Mike Haertel. - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public -License along with this library; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. - - The author may be reached (Email) at the address mike@ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. */ - -#ifndef _MALLOC_INTERNAL -#define _MALLOC_INTERNAL -#include <malloc.h> -#endif - -/* Debugging hook for free. */ -void (*__free_hook) __P ((__ptr_t __ptr)); - -/* List of blocks allocated by memalign. */ -struct alignlist *_aligned_blocks = NULL; - -/* Return memory to the heap. - Like `free' but don't call a __free_hook if there is one. */ -void -_free_internal (ptr) - __ptr_t ptr; -{ - int type; - size_t block, blocks; - register size_t i; - struct list *prev, *next; - - block = BLOCK (ptr); - - type = _heapinfo[block].busy.type; - switch (type) - { - case 0: - /* Get as many statistics as early as we can. */ - --_chunks_used; - _bytes_used -= _heapinfo[block].busy.info.size * BLOCKSIZE; - _bytes_free += _heapinfo[block].busy.info.size * BLOCKSIZE; - - /* Find the free cluster previous to this one in the free list. - Start searching at the last block referenced; this may benefit - programs with locality of allocation. */ - i = _heapindex; - if (i > block) - while (i > block) - i = _heapinfo[i].free.prev; - else - { - do - i = _heapinfo[i].free.next; - while (i > 0 && i < block); - i = _heapinfo[i].free.prev; - } - - /* Determine how to link this block into the free list. */ - if (block == i + _heapinfo[i].free.size) - { - /* Coalesce this block with its predecessor. */ - _heapinfo[i].free.size += _heapinfo[block].busy.info.size; - block = i; - } - else - { - /* Really link this block back into the free list. */ - _heapinfo[block].free.size = _heapinfo[block].busy.info.size; - _heapinfo[block].free.next = _heapinfo[i].free.next; - _heapinfo[block].free.prev = i; - _heapinfo[i].free.next = block; - _heapinfo[_heapinfo[block].free.next].free.prev = block; - ++_chunks_free; - } - - /* Now that the block is linked in, see if we can coalesce it - with its successor (by deleting its successor from the list - and adding in its size). */ - if (block + _heapinfo[block].free.size == _heapinfo[block].free.next) - { - _heapinfo[block].free.size - += _heapinfo[_heapinfo[block].free.next].free.size; - _heapinfo[block].free.next - = _heapinfo[_heapinfo[block].free.next].free.next; - _heapinfo[_heapinfo[block].free.next].free.prev = block; - --_chunks_free; - } - - /* Now see if we can return stuff to the system. */ - blocks = _heapinfo[block].free.size; - if (blocks >= FINAL_FREE_BLOCKS && block + blocks == _heaplimit - && (*__morecore) (0) == ADDRESS (block + blocks)) - { - register size_t bytes = blocks * BLOCKSIZE; - _heaplimit -= blocks; - (*__morecore) (-bytes); - _heapinfo[_heapinfo[block].free.prev].free.next - = _heapinfo[block].free.next; - _heapinfo[_heapinfo[block].free.next].free.prev - = _heapinfo[block].free.prev; - block = _heapinfo[block].free.prev; - --_chunks_free; - _bytes_free -= bytes; - } - - /* Set the next search to begin at this block. */ - _heapindex = block; - break; - - default: - /* Do some of the statistics. */ - --_chunks_used; - _bytes_used -= 1 << type; - ++_chunks_free; - _bytes_free += 1 << type; - - /* Get the address of the first free fragment in this block. */ - prev = (struct list *) ((char *) ADDRESS (block) + - (_heapinfo[block].busy.info.frag.first << type)); - - if (_heapinfo[block].busy.info.frag.nfree == (BLOCKSIZE >> type) - 1) - { - /* If all fragments of this block are free, remove them - from the fragment list and free the whole block. */ - next = prev; - for (i = 1; i < (size_t) (BLOCKSIZE >> type); ++i) - next = next->next; - prev->prev->next = next; - if (next != NULL) - next->prev = prev->prev; - _heapinfo[block].busy.type = 0; - _heapinfo[block].busy.info.size = 1; - - /* Keep the statistics accurate. */ - ++_chunks_used; - _bytes_used += BLOCKSIZE; - _chunks_free -= BLOCKSIZE >> type; - _bytes_free -= BLOCKSIZE; - - free (ADDRESS (block)); - } - else if (_heapinfo[block].busy.info.frag.nfree != 0) - { - /* If some fragments of this block are free, link this - fragment into the fragment list after the first free - fragment of this block. */ - next = (struct list *) ptr; - next->next = prev->next; - next->prev = prev; - prev->next = next; - if (next->next != NULL) - next->next->prev = next; - ++_heapinfo[block].busy.info.frag.nfree; - } - else - { - /* No fragments of this block are free, so link this - fragment into the fragment list and announce that - it is the first free fragment of this block. */ - prev = (struct list *) ptr; - _heapinfo[block].busy.info.frag.nfree = 1; - _heapinfo[block].busy.info.frag.first = (unsigned long int) - ((unsigned long int) ((char *) ptr - (char *) NULL) - % BLOCKSIZE >> type); - prev->next = _fraghead[type].next; - prev->prev = &_fraghead[type]; - prev->prev->next = prev; - if (prev->next != NULL) - prev->next->prev = prev; - } - break; - } -} - -/* Return memory to the heap. */ -void -free (ptr) - __ptr_t ptr; -{ - register struct alignlist *l; - - if (ptr == NULL) - return; - - for (l = _aligned_blocks; l != NULL; l = l->next) - if (l->aligned == ptr) - { - l->aligned = NULL; /* Mark the slot in the list as free. */ - ptr = l->exact; - break; - } - - if (__free_hook != NULL) - (*__free_hook) (ptr); - else - _free_internal (ptr); -} diff --git a/gnu/lib/libmalloc/realloc.c b/gnu/lib/libmalloc/realloc.c deleted file mode 100644 index 2d31766a5379..000000000000 --- a/gnu/lib/libmalloc/realloc.c +++ /dev/null @@ -1,146 +0,0 @@ -/* Change the size of a block allocated by `malloc'. - Copyright 1990, 1991, 1992, 1993 Free Software Foundation, Inc. - Written May 1989 by Mike Haertel. - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public -License along with this library; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 675 Mass Ave, -Cambridge, MA 02139, USA. - - The author may be reached (Email) at the address mike@ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. */ - -#ifndef _MALLOC_INTERNAL -#define _MALLOC_INTERNAL -#include <malloc.h> -#endif - -#define min(A, B) ((A) < (B) ? (A) : (B)) - -/* Debugging hook for realloc. */ -__ptr_t (*__realloc_hook) __P ((__ptr_t __ptr, size_t __size)); - -/* Resize the given region to the new size, returning a pointer - to the (possibly moved) region. This is optimized for speed; - some benchmarks seem to indicate that greater compactness is - achieved by unconditionally allocating and copying to a - new region. This module has incestuous knowledge of the - internals of both free and malloc. */ -__ptr_t -realloc (ptr, size) - __ptr_t ptr; - size_t size; -{ - __ptr_t result; - int type; - size_t block, blocks, oldlimit; - - if (size == 0) - { - free (ptr); - return malloc (0); - } - else if (ptr == NULL) - return malloc (size); - - if (__realloc_hook != NULL) - return (*__realloc_hook) (ptr, size); - - block = BLOCK (ptr); - - type = _heapinfo[block].busy.type; - switch (type) - { - case 0: - /* Maybe reallocate a large block to a small fragment. */ - if (size <= BLOCKSIZE / 2) - { - result = malloc (size); - if (result != NULL) - { - memcpy (result, ptr, size); - free (ptr); - return result; - } - } - - /* The new size is a large allocation as well; - see if we can hold it in place. */ - blocks = BLOCKIFY (size); - if (blocks < _heapinfo[block].busy.info.size) - { - /* The new size is smaller; return - excess memory to the free list. */ - _heapinfo[block + blocks].busy.type = 0; - _heapinfo[block + blocks].busy.info.size - = _heapinfo[block].busy.info.size - blocks; - _heapinfo[block].busy.info.size = blocks; - free (ADDRESS (block + blocks)); - result = ptr; - } - else if (blocks == _heapinfo[block].busy.info.size) - /* No size change necessary. */ - result = ptr; - else - { - /* Won't fit, so allocate a new region that will. - Free the old region first in case there is sufficient - adjacent free space to grow without moving. */ - blocks = _heapinfo[block].busy.info.size; - /* Prevent free from actually returning memory to the system. */ - oldlimit = _heaplimit; - _heaplimit = 0; - free (ptr); - _heaplimit = oldlimit; - result = malloc (size); - if (result == NULL) - { - /* Now we're really in trouble. We have to unfree - the thing we just freed. Unfortunately it might - have been coalesced with its neighbors. */ - if (_heapindex == block) - (void) malloc (blocks * BLOCKSIZE); - else - { - __ptr_t previous = malloc ((block - _heapindex) * BLOCKSIZE); - (void) malloc (blocks * BLOCKSIZE); - free (previous); - } - return NULL; - } - if (ptr != result) - memmove (result, ptr, blocks * BLOCKSIZE); - } - break; - - default: - /* Old size is a fragment; type is logarithm - to base two of the fragment size. */ - if (size > (size_t) (1 << (type - 1)) && size <= (size_t) (1 << type)) - /* The new size is the same kind of fragment. */ - result = ptr; - else - { - /* The new size is different; allocate a new space, - and copy the lesser of the new size and the old. */ - result = malloc (size); - if (result == NULL) - return NULL; - memcpy (result, ptr, min (size, (size_t) 1 << type)); - free (ptr); - } - break; - } - - return result; -} diff --git a/gnu/lib/libreadline/README.FreeBSD b/gnu/lib/libreadline/README.FreeBSD deleted file mode 100644 index 6af2775c2eb0..000000000000 --- a/gnu/lib/libreadline/README.FreeBSD +++ /dev/null @@ -1,21 +0,0 @@ -The GNU Readline library is a programming tool that provides a -consistent user interface for recalling lines of previously typed -input and performing editing tasks on input lines. - -paul@freefall.cdrom.com - -There was a bug with tcsh: when readline attempt to get tty -modes from background, it got no-echo editing tcsh mode. - -Workaround for this implemented via TIOCGWINSZ/TIOCSWINSZ -with same winsize structure: it does nothing expect polling -process from background. Look tcsh_hack.readme for details. - -This version is more ctype-oriented than original bash version. - -If you want 8-bit clean version, put - set convert-meta off - set output-meta on -in your ~/.inputrc file - -ache@astral.msk.su diff --git a/gnu/lib/libreadline/doc/ChangeLog b/gnu/lib/libreadline/doc/ChangeLog deleted file mode 100644 index 5f1f506178c3..000000000000 --- a/gnu/lib/libreadline/doc/ChangeLog +++ /dev/null @@ -1,8 +0,0 @@ -Tue Feb 2 11:40:04 1993 Roland H. Pesch (pesch@fowanton.cygnus.com) - - * Makefile.in: configurable (and useable) Makefile template - * Makefile: removed, replaced with configurable Makefile.in - * texindex.c texinfo.tex: remove, replacing w/refs to tools - elsewhere in distribution tree - * configure.in: pro forma configure stub - * ChangeLog: new file diff --git a/gnu/lib/libreadline/doc/hist.texinfo b/gnu/lib/libreadline/doc/hist.texinfo deleted file mode 100644 index cc80efab2288..000000000000 --- a/gnu/lib/libreadline/doc/hist.texinfo +++ /dev/null @@ -1,113 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header (This is for running Texinfo on a region.) -@setfilename history.info -@settitle GNU History Library -@c %**end of header (This is for running Texinfo on a region.) - -@setchapternewpage odd - -@ignore -last change: Wed Jul 20 09:57:17 EDT 1994 -@end ignore - -@set EDITION 2.0 -@set VERSION 2.0 -@set UPDATED 20 July 1994 -@set UPDATE-MONTH July 1994 - -@ifinfo -This document describes the GNU History library, a programming tool that -provides a consistent user interface for recalling lines of previously -typed input. - -Copyright (C) 1988, 1991 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -pare preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end ifinfo - -@titlepage -@sp 10 -@title GNU History Library -@subtitle Edition @value{EDITION}, for @code{History Library} Version @value{VERSION}. -@subtitle @value{UPDATE-MONTH} -@author Brian Fox, Free Software Foundation -@author Chet Ramey, Case Western Reserve University - -@page -This document describes the GNU History library, a programming tool that -provides a consistent user interface for recalling lines of previously -typed input. - -Published by the Free Software Foundation @* -675 Massachusetts Avenue, @* -Cambridge, MA 02139 USA - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. - -@vskip 0pt plus 1filll -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -@end titlepage - -@ifinfo -@node Top -@top GNU History Library - -This document describes the GNU History library, a programming tool that -provides a consistent user interface for recalling lines of previously -typed input. - -@menu -* Using History Interactively:: GNU History User's Manual. -* Programming with GNU History:: GNU History Programmer's Manual. -* Concept Index:: Index of concepts described in this manual. -* Function and Variable Index:: Index of externally visible functions - and variables. -@end menu -@end ifinfo - -@syncodeindex fn vr - -@include hsuser.texinfo -@include hstech.texinfo - -@node Concept Index -@appendix Concept Index -@printindex cp - -@node Function and Variable Index -@appendix Function and Variable Index -@printindex vr - -@contents -@bye diff --git a/gnu/lib/libreadline/doc/history.info b/gnu/lib/libreadline/doc/history.info deleted file mode 100644 index 6df0bd9426df..000000000000 --- a/gnu/lib/libreadline/doc/history.info +++ /dev/null @@ -1,744 +0,0 @@ -This is Info file history.info, produced by Makeinfo-1.55 from the -input file hist.texinfo. - - This document describes the GNU History library, a programming tool -that provides a consistent user interface for recalling lines of -previously typed input. - - Copyright (C) 1988, 1991 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice pare -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - -File: history.info, Node: Top, Next: Using History Interactively, Prev: (DIR), Up: (DIR) - -GNU History Library -******************* - - This document describes the GNU History library, a programming tool -that provides a consistent user interface for recalling lines of -previously typed input. - -* Menu: - -* Using History Interactively:: GNU History User's Manual. -* Programming with GNU History:: GNU History Programmer's Manual. -* Concept Index:: Index of concepts described in this manual. -* Function and Variable Index:: Index of externally visible functions - and variables. - - -File: history.info, Node: Using History Interactively, Next: Programming with GNU History, Prev: Top, Up: Top - -Using History Interactively -*************************** - - This chapter describes how to use the GNU History Library -interactively, from a user's standpoint. It should be considered a -user's guide. For information on using the GNU History Library in your -own programs, *note Programming with GNU History::.. - -* Menu: - -* History Interaction:: What it feels like using History as a user. - - -File: history.info, Node: History Interaction, Up: Using History Interactively - -History Interaction -=================== - - The History library provides a history expansion feature that is -similar to the history expansion provided by `csh'. The following text -describes the syntax used to manipulate the history information. - - History expansion takes place in two parts. The first is to -determine which line from the previous history should be used during -substitution. The second is to select portions of that line for -inclusion into the current one. The line selected from the previous -history is called the "event", and the portions of that line that are -acted upon are called "words". The line is broken into words in the -same fashion that Bash does, so that several English (or Unix) words -surrounded by quotes are considered as one word. - -* Menu: - -* Event Designators:: How to specify which history line to use. -* Word Designators:: Specifying which words are of interest. -* Modifiers:: Modifying the results of substitution. - - -File: history.info, Node: Event Designators, Next: Word Designators, Up: History Interaction - -Event Designators ------------------ - - An event designator is a reference to a command line entry in the -history list. - -`!' - Start a history substitution, except when followed by a space, tab, - the end of the line, = or (. - -`!!' - Refer to the previous command. This is a synonym for `!-1'. - -`!n' - Refer to command line N. - -`!-n' - Refer to the command N lines back. - -`!string' - Refer to the most recent command starting with STRING. - -`!?string'[`?'] - Refer to the most recent command containing STRING. - -`!#' - The entire command line typed so far. - -`^string1^string2^' - Quick Substitution. Repeat the last command, replacing STRING1 - with STRING2. Equivalent to `!!:s/string1/string2/'. - - -File: history.info, Node: Word Designators, Next: Modifiers, Prev: Event Designators, Up: History Interaction - -Word Designators ----------------- - - A : separates the event specification from the word designator. It -can be omitted if the word designator begins with a ^, $, * or %. -Words are numbered from the beginning of the line, with the first word -being denoted by a 0 (zero). - -`0 (zero)' - The `0'th word. For many applications, this is the command word. - -`n' - The Nth word. - -`^' - The first argument; that is, word 1. - -`$' - The last argument. - -`%' - The word matched by the most recent `?string?' search. - -`x-y' - A range of words; `-Y' abbreviates `0-Y'. - -`*' - All of the words, except the `0'th. This is a synonym for `1-$'. - It is not an error to use * if there is just one word in the event; - the empty string is returned in that case. - -`x*' - Abbreviates `x-$' - -`x-' - Abbreviates `x-$' like `x*', but omits the last word. - - -File: history.info, Node: Modifiers, Prev: Word Designators, Up: History Interaction - -Modifiers ---------- - - After the optional word designator, you can add a sequence of one or -more of the following modifiers, each preceded by a :. - -`h' - Remove a trailing pathname component, leaving only the head. - -`r' - Remove a trailing suffix of the form `.'SUFFIX, leaving the - basename. - -`e' - Remove all but the trailing suffix. - -`t' - Remove all leading pathname components, leaving the tail. - -`p' - Print the new command but do not execute it. - -`s/old/new/' - Substitute NEW for the first occurrence of OLD in the event line. - Any delimiter may be used in place of /. The delimiter may be - quoted in OLD and NEW with a single backslash. If & appears in - NEW, it is replaced by OLD. A single backslash will quote the &. - The final delimiter is optional if it is the last character on the - input line. - -`&' - Repeat the previous substitution. - -`g' - Cause changes to be applied over the entire event line. Used in - conjunction with `s', as in `gs/old/new/', or with `&'. - - -File: history.info, Node: Programming with GNU History, Next: Concept Index, Prev: Using History Interactively, Up: Top - -Programming with GNU History -**************************** - - This chapter describes how to interface programs that you write with -the GNU History Library. It should be considered a technical guide. -For information on the interactive use of GNU History, *note Using -History Interactively::.. - -* Menu: - -* Introduction to History:: What is the GNU History library for? -* History Storage:: How information is stored. -* History Functions:: Functions that you can use. -* History Variables:: Variables that control behaviour. -* History Programming Example:: Example of using the GNU History Library. - - -File: history.info, Node: Introduction to History, Next: History Storage, Up: Programming with GNU History - -Introduction to History -======================= - - Many programs read input from the user a line at a time. The GNU -History library is able to keep track of those lines, associate -arbitrary data with each line, and utilize information from previous -lines in composing new ones. - - The programmer using the History library has available functions for -remembering lines on a history list, associating arbitrary data with a -line, removing lines from the list, searching through the list for a -line containing an arbitrary text string, and referencing any line in -the list directly. In addition, a history "expansion" function is -available which provides for a consistent user interface across -different programs. - - The user using programs written with the History library has the -benefit of a consistent user interface with a set of well-known -commands for manipulating the text of previous lines and using that text -in new commands. The basic history manipulation commands are similar to -the history substitution provided by `csh'. - - If the programmer desires, he can use the Readline library, which -includes some history manipulation by default, and has the added -advantage of command line editing. - - -File: history.info, Node: History Storage, Next: History Functions, Prev: Introduction to History, Up: Programming with GNU History - -History Storage -=============== - - The history list is an array of history entries. A history entry is -declared as follows: - - typedef struct _hist_entry { - char *line; - char *data; - } HIST_ENTRY; - - The history list itself might therefore be declared as - - HIST_ENTRY **the_history_list; - - The state of the History library is encapsulated into a single -structure: - - /* A structure used to pass the current state of the history stuff around. */ - typedef struct _hist_state { - HIST_ENTRY **entries; /* Pointer to the entries themselves. */ - int offset; /* The location pointer within this array. */ - int length; /* Number of elements within this array. */ - int size; /* Number of slots allocated to this array. */ - int flags; - } HISTORY_STATE; - - If the flags member includes `HS_STIFLED', the history has been -stifled. - - -File: history.info, Node: History Functions, Next: History Variables, Prev: History Storage, Up: Programming with GNU History - -History Functions -================= - - This section describes the calling sequence for the various functions -present in GNU History. - -* Menu: - -* Initializing History and State Management:: Functions to call when you - want to use history in a - program. -* History List Management:: Functions used to manage the list - of history entries. -* Information About the History List:: Functions returning information about - the history list. -* Moving Around the History List:: Functions used to change the position - in the history list. -* Searching the History List:: Functions to search the history list - for entries containing a string. -* Managing the History File:: Functions that read and write a file - containing the history list. -* History Expansion:: Functions to perform csh-like history - expansion. - - -File: history.info, Node: Initializing History and State Management, Next: History List Management, Up: History Functions - -Initializing History and State Management ------------------------------------------ - - This section describes functions used to initialize and manage the -state of the History library when you want to use the history functions -in your program. - - - Function: void using_history () - Begin a session in which the history functions might be used. This - initializes the interactive variables. - - - Function: HISTORY_STATE * history_get_history_state () - Return a structure describing the current state of the input - history. - - - Function: void history_set_history_state (HISTORY_STATE *state) - Set the state of the history list according to STATE. - - -File: history.info, Node: History List Management, Next: Information About the History List, Prev: Initializing History and State Management, Up: History Functions - -History List Management ------------------------ - - These functions manage individual entries on the history list, or set -parameters managing the list itself. - - - Function: void add_history (char *string) - Place STRING at the end of the history list. The associated data - field (if any) is set to `NULL'. - - - Function: HIST_ENTRY * remove_history (int which) - Remove history entry at offset WHICH from the history. The - removed element is returned so you can free the line, data, and - containing structure. - - - Function: HIST_ENTRY * replace_history_entry (int which, char *line, - char *data) - Make the history entry at offset WHICH have LINE and DATA. This - returns the old entry so you can dispose of the data. In the case - of an invalid WHICH, a `NULL' pointer is returned. - - - Function: void stifle_history (int max) - Stifle the history list, remembering only the last MAX entries. - - - Function: int unstifle_history () - Stop stifling the history. This returns the previous amount the - history was stifled. The value is positive if the history was - stifled, negative if it wasn't. - - - Function: int history_is_stifled () - Returns non-zero if the history is stifled, zero if it is not. - - -File: history.info, Node: Information About the History List, Next: Moving Around the History List, Prev: History List Management, Up: History Functions - -Information About the History List ----------------------------------- - - These functions return information about the entire history list or -individual list entries. - - - Function: HIST_ENTRY ** history_list () - Return a `NULL' terminated array of `HIST_ENTRY' which is the - current input history. Element 0 of this list is the beginning of - time. If there is no history, return `NULL'. - - - Function: int where_history () - Returns the offset of the current history element. - - - Function: HIST_ENTRY * current_history () - Return the history entry at the current position, as determined by - `where_history ()'. If there is no entry there, return a `NULL' - pointer. - - - Function: HIST_ENTRY * history_get (int offset) - Return the history entry at position OFFSET, starting from - `history_base'. If there is no entry there, or if OFFSET is - greater than the history length, return a `NULL' pointer. - - - Function: int history_total_bytes () - Return the number of bytes that the primary history entries are - using. This function returns the sum of the lengths of all the - lines in the history. - - -File: history.info, Node: Moving Around the History List, Next: Searching the History List, Prev: Information About the History List, Up: History Functions - -Moving Around the History List ------------------------------- - - These functions allow the current index into the history list to be -set or changed. - - - Function: int history_set_pos (int pos) - Set the position in the history list to POS, an absolute index - into the list. - - - Function: HIST_ENTRY * previous_history () - Back up the current history offset to the previous history entry, - and return a pointer to that entry. If there is no previous - entry, return a `NULL' pointer. - - - Function: HIST_ENTRY * next_history () - Move the current history offset forward to the next history entry, - and return the a pointer to that entry. If there is no next - entry, return a `NULL' pointer. - - -File: history.info, Node: Searching the History List, Next: Managing the History File, Prev: Moving Around the History List, Up: History Functions - -Searching the History List --------------------------- - - These functions allow searching of the history list for entries -containing a specific string. Searching may be performed both forward -and backward from the current history position. The search may be -"anchored", meaning that the string must match at the beginning of the -history entry. - - - Function: int history_search (char *string, int direction) - Search the history for STRING, starting at the current history - offset. If DIRECTION < 0, then the search is through previous - entries, else through subsequent. If STRING is found, then the - current history index is set to that history entry, and the value - returned is the offset in the line of the entry where STRING was - found. Otherwise, nothing is changed, and a -1 is returned. - - - Function: int history_search_prefix (char *string, int direction) - Search the history for STRING, starting at the current history - offset. The search is anchored: matching lines must begin with - STRING. If DIRECTION < 0, then the search is through previous - entries, else through subsequent. If STRING is found, then the - current history index is set to that entry, and the return value - is 0. Otherwise, nothing is changed, and a -1 is returned. - - - Function: int history_search_pos (char *string, int direction, int - pos) - Search for STRING in the history list, starting at POS, an - absolute index into the list. If DIRECTION is negative, the search - proceeds backward from POS, otherwise forward. Returns the - absolute index of the history element where STRING was found, or - -1 otherwise. - - -File: history.info, Node: Managing the History File, Next: History Expansion, Prev: Searching the History List, Up: History Functions - -Managing the History File -------------------------- - - The History library can read the history from and write it to a file. -This section documents the functions for managing a history file. - - - Function: int read_history (char *filename) - Add the contents of FILENAME to the history list, a line at a - time. If FILENAME is `NULL', then read from `~/.history'. - Returns 0 if successful, or errno if not. - - - Function: int read_history_range (char *filename, int from, int to) - Read a range of lines from FILENAME, adding them to the history - list. Start reading at line FROM and end at TO. If FROM is zero, - start at the beginning. If TO is less than FROM, then read until - the end of the file. If FILENAME is `NULL', then read from - `~/.history'. Returns 0 if successful, or `errno' if not. - - - Function: int write_history (char *filename) - Write the current history to FILENAME, overwriting FILENAME if - necessary. If FILENAME is `NULL', then write the history list to - `~/.history'. Values returned are as in `read_history ()'. - - - Function: int append_history (int nelements, char *filename) - Append the last NELEMENTS of the history list to FILENAME. - - - Function: int history_truncate_file (char *filename, int nlines) - Truncate the history file FILENAME, leaving only the last NLINES - lines. - - -File: history.info, Node: History Expansion, Prev: Managing the History File, Up: History Functions - -History Expansion ------------------ - - These functions implement `csh'-like history expansion. - - - Function: int history_expand (char *string, char **output) - Expand STRING, placing the result into OUTPUT, a pointer to a - string (*note History Interaction::.). Returns: - `0' - If no expansions took place (or, if the only change in the - text was the de-slashifying of the history expansion - character); - - `1' - if expansions did take place; - - `-1' - if there was an error in expansion; - - `2' - if the returned line should only be displayed, but not - executed, as with the `:p' modifier (*note Modifiers::.). - - If an error ocurred in expansion, then OUTPUT contains a - descriptive error message. - - - Function: char * history_arg_extract (int first, int last, char - *string) - Extract a string segment consisting of the FIRST through LAST - arguments present in STRING. Arguments are broken up as in Bash. - - - Function: char * get_history_event (char *string, int *cindex, int - qchar) - Returns the text of the history event beginning at STRING + - *CINDEX. *CINDEX is modified to point to after the event - specifier. At function entry, CINDEX points to the index into - STRING where the history event specification begins. QCHAR is a - character that is allowed to end the event specification in - addition to the "normal" terminating characters. - - - Function: char ** history_tokenize (char *string) - Return an array of tokens parsed out of STRING, much as the shell - might. The tokens are split on white space and on the characters - `()<>;&|$', and shell quoting conventions are obeyed. - - -File: history.info, Node: History Variables, Next: History Programming Example, Prev: History Functions, Up: Programming with GNU History - -History Variables -================= - - This section describes the externally visible variables exported by -the GNU History Library. - - - Variable: int history_base - The logical offset of the first entry in the history list. - - - Variable: int history_length - The number of entries currently stored in the history list. - - - Variable: int max_input_history - The maximum number of history entries. This must be changed using - `stifle_history ()'. - - - Variable: char history_expansion_char - The character that starts a history event. The default is `!'. - - - Variable: char history_subst_char - The character that invokes word substitution if found at the start - of a line. The default is `^'. - - - Variable: char history_comment_char - During tokenization, if this character is seen as the first - character of a word, then it and all subsequent characters up to a - newline are ignored, suppressing history expansion for the - remainder of the line. This is disabled by default. - - - Variable: char * history_no_expand_chars - The list of characters which inhibit history expansion if found - immediately following HISTORY_EXPANSION_CHAR. The default is - whitespace and `='. - - -File: history.info, Node: History Programming Example, Prev: History Variables, Up: Programming with GNU History - -History Programming Example -=========================== - - The following program demonstrates simple use of the GNU History -Library. - - main () - { - char line[1024], *t; - int len, done = 0; - - line[0] = 0; - - using_history (); - while (!done) - { - printf ("history$ "); - fflush (stdout); - t = fgets (line, sizeof (line) - 1, stdin); - if (t && *t) - { - len = strlen (t); - if (t[len - 1] == '\n') - t[len - 1] = '\0'; - } - - if (!t) - strcpy (line, "quit"); - - if (line[0]) - { - char *expansion; - int result; - - result = history_expand (line, &expansion); - if (result) - fprintf (stderr, "%s\n", expansion); - - if (result < 0 || result == 2) - { - free (expansion); - continue; - } - - add_history (expansion); - strncpy (line, expansion, sizeof (line) - 1); - free (expansion); - } - - if (strcmp (line, "quit") == 0) - done = 1; - else if (strcmp (line, "save") == 0) - write_history ("history_file"); - else if (strcmp (line, "read") == 0) - read_history ("history_file"); - else if (strcmp (line, "list") == 0) - { - register HIST_ENTRY **the_list; - register int i; - - the_list = history_list (); - if (the_list) - for (i = 0; the_list[i]; i++) - printf ("%d: %s\n", i + history_base, the_list[i]->line); - } - else if (strncmp (line, "delete", 6) == 0) - { - int which; - if ((sscanf (line + 6, "%d", &which)) == 1) - { - HIST_ENTRY *entry = remove_history (which); - if (!entry) - fprintf (stderr, "No such entry %d\n", which); - else - { - free (entry->line); - free (entry); - } - } - else - { - fprintf (stderr, "non-numeric arg given to `delete'\n"); - } - } - } - } - - -File: history.info, Node: Concept Index, Next: Function and Variable Index, Prev: Programming with GNU History, Up: Top - -Concept Index -************* - -* Menu: - -* anchored search: Searching the History List. -* event designators: Event Designators. -* expansion: History Interaction. -* history events: Event Designators. -* History Searching: Searching the History List. - - -File: history.info, Node: Function and Variable Index, Prev: Concept Index, Up: Top - -Function and Variable Index -*************************** - -* Menu: - -* add_history: History List Management. -* append_history: Managing the History File. -* current_history: Information About the History List. -* get_history_event: History Expansion. -* history_arg_extract: History Expansion. -* history_base: History Variables. -* history_comment_char: History Variables. -* history_expand: History Expansion. -* history_expansion_char: History Variables. -* history_get: Information About the History List. -* history_get_history_state: Initializing History and State Management. -* history_is_stifled: History List Management. -* history_length: History Variables. -* history_list: Information About the History List. -* history_no_expand_chars: History Variables. -* history_search: Searching the History List. -* history_search_pos: Searching the History List. -* history_search_prefix: Searching the History List. -* history_set_history_state: Initializing History and State Management. -* history_set_pos: Moving Around the History List. -* history_subst_char: History Variables. -* history_tokenize: History Expansion. -* history_total_bytes: Information About the History List. -* history_truncate_file: Managing the History File. -* max_input_history: History Variables. -* next_history: Moving Around the History List. -* previous_history: Moving Around the History List. -* read_history: Managing the History File. -* read_history_range: Managing the History File. -* remove_history: History List Management. -* replace_history_entry: History List Management. -* stifle_history: History List Management. -* unstifle_history: History List Management. -* using_history: Initializing History and State Management. -* where_history: Information About the History List. -* write_history: Managing the History File. - - - -Tag Table: -Node: Top975 -Node: Using History Interactively1569 -Node: History Interaction2077 -Node: Event Designators3122 -Node: Word Designators3952 -Node: Modifiers4936 -Node: Programming with GNU History6065 -Node: Introduction to History6791 -Node: History Storage8112 -Node: History Functions9205 -Node: Initializing History and State Management10176 -Node: History List Management10968 -Node: Information About the History List12396 -Node: Moving Around the History List13702 -Node: Searching the History List14587 -Node: Managing the History File16419 -Node: History Expansion17925 -Node: History Variables19769 -Node: History Programming Example21138 -Node: Concept Index23742 -Node: Function and Variable Index24223 - -End Tag Table diff --git a/gnu/lib/libreadline/doc/inc-hist.texi b/gnu/lib/libreadline/doc/inc-hist.texi deleted file mode 100644 index 539e372c5d46..000000000000 --- a/gnu/lib/libreadline/doc/inc-hist.texi +++ /dev/null @@ -1,155 +0,0 @@ -@ignore -This file is completely identical to hsuser.texinfo, except that it has the -reference to the programming manual removed. There are definately better ways -to do this! - -This file documents the user interface to the GNU History library. - -Copyright (C) 1988, 1991 Free Software Foundation, Inc. -Authored by Brian Fox. - -Permission is granted to make and distribute verbatim copies of this manual -provided the copyright notice and this permission notice are preserved on -all copies. - -Permission is granted to process this file through Tex and print the -results, provided the printed document carries copying permission notice -identical to this one except for the removal of this paragraph (this -paragraph not being relevant to the printed manual). - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -GNU Copyright statement is available to the distributee, and provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions. -@end ignore - -@node Using History Interactively -@appendix Using History Interactively - -This chapter describes how to use the GNU History Library interactively, -from a user's standpoint. - -@menu -* History Interaction:: What it feels like using History as a user. -@end menu - -@node History Interaction -@section History Interaction -@cindex expansion - -The History library provides a history expansion feature that is similar -to the history expansion in Csh. The following text describes the sytax -that you use to manipulate the history information. - -History expansion takes place in two parts. The first is to determine -which line from the previous history should be used during substitution. -The second is to select portions of that line for inclusion into the -current one. The line selected from the previous history is called the -@dfn{event}, and the portions of that line that are acted upon are -called @dfn{words}. The line is broken into words in the same fashion -that the Bash shell does, so that several English (or Unix) words -surrounded by quotes are considered as one word. - -@menu -* Event Designators:: How to specify which history line to use. -* Word Designators:: Specifying which words are of interest. -* Modifiers:: Modifying the results of susbstitution. -@end menu - -@node Event Designators -@subsection Event Designators -@cindex event designators - -An event designator is a reference to a command line entry in the -history list. - -@table @asis - -@item @code{!} -Start a history subsititution, except when followed by a space, tab, or -the end of the line... @key{=} or @key{(}. - -@item @code{!!} -Refer to the previous command. This is a synonym for @code{!-1}. - -@item @code{!n} -Refer to command line @var{n}. - -@item @code{!-n} -Refer to the command line @var{n} lines back. - -@item @code{!string} -Refer to the most recent command starting with @var{string}. - -@item @code{!?string}[@code{?}] -Refer to the most recent command containing @var{string}. - -@end table - -@node Word Designators -@subsection Word Designators - -A @key{:} separates the event specification from the word designator. It -can be omitted if the word designator begins with a @key{^}, @key{$}, -@key{*} or @key{%}. Words are numbered from the beginning of the line, -with the first word being denoted by a 0 (zero). - -@table @code - -@item 0 (zero) -The zero'th word. For many applications, this is the command word. - -@item n -The @var{n}'th word. - -@item ^ -The first argument. that is, word 1. - -@item $ -The last argument. - -@item % -The word matched by the most recent @code{?string?} search. - -@item x-y -A range of words; @code{-@var{y}} Abbreviates @code{0-@var{y}}. - -@item * -All of the words, excepting the zero'th. This is a synonym for @code{1-$}. -It is not an error to use @key{*} if there is just one word in the event. -The empty string is returned in that case. - -@end table - -@node Modifiers -@subsection Modifiers - -After the optional word designator, you can add a sequence of one or more -of the following modifiers, each preceded by a @key{:}. - -@table @code - -@item # -The entire command line typed so far. This means the current command, -not the previous command, so it really isn't a word designator, and doesn't -belong in this section. - -@item h -Remove a trailing pathname component, leaving only the head. - -@item r -Remove a trailing suffix of the form @samp{.}@var{suffix}, leaving the basename. - -@item e -Remove all but the suffix. - -@item t -Remove all leading pathname components, leaving the tail. - -@item p -Print the new command but do not execute it. -@end table diff --git a/gnu/lib/libreadline/doc/readline.info b/gnu/lib/libreadline/doc/readline.info deleted file mode 100644 index f4882e9a495b..000000000000 --- a/gnu/lib/libreadline/doc/readline.info +++ /dev/null @@ -1,74 +0,0 @@ -This is Info file readline.info, produced by Makeinfo-1.55 from the -input file rlman.texinfo. - - This document describes the GNU Readline Library, a utility which -aids in the consistency of user interface across discrete programs that -need to provide a command line interface. - - Copyright (C) 1988, 1991 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice pare -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - -Indirect: -readline.info-1: 1000 -readline.info-2: 50467 - -Tag Table: -(Indirect) -Node: Top1000 -Node: Command Line Editing1613 -Node: Introduction and Notation2264 -Node: Readline Interaction3284 -Node: Readline Bare Essentials4423 -Node: Readline Movement Commands5953 -Node: Readline Killing Commands6844 -Node: Readline Arguments8547 -Node: Readline Init File9498 -Node: Readline Init Syntax10502 -Node: Conditional Init Constructs17435 -Node: Bindable Readline Commands19681 -Node: Commands For Moving20351 -Node: Commands For History21199 -Node: Commands For Text23783 -Node: Commands For Killing25522 -Node: Numeric Arguments26971 -Node: Commands For Completion27598 -Node: Keyboard Macros28525 -Node: Miscellaneous Commands29084 -Node: Readline vi Mode30372 -Node: Programming with GNU Readline32122 -Node: Basic Behavior32919 -Node: Custom Functions36232 -Node: The Function Type36845 -Node: Function Writing37690 -Node: Readline Convenience Functions40453 -Node: Function Naming41118 -Node: Keymaps42345 -Node: Binding Keys43856 -Node: Associating Function Names and Bindings45650 -Node: Allowing Undoing46812 -Node: Redisplay49397 -Node: Modifying Text50467 -Node: Utility Functions51378 -Node: Custom Completers54444 -Node: How Completing Works55165 -Node: Completion Functions58156 -Node: Completion Variables61171 -Node: A Short Completion Example64996 -Node: Concept Index77230 -Node: Function and Variable Index77717 - -End Tag Table diff --git a/gnu/lib/libreadline/doc/readline.info-1 b/gnu/lib/libreadline/doc/readline.info-1 deleted file mode 100644 index 78bbd057ad2f..000000000000 --- a/gnu/lib/libreadline/doc/readline.info-1 +++ /dev/null @@ -1,1322 +0,0 @@ -This is Info file readline.info, produced by Makeinfo-1.55 from the -input file rlman.texinfo. - - This document describes the GNU Readline Library, a utility which -aids in the consistency of user interface across discrete programs that -need to provide a command line interface. - - Copyright (C) 1988, 1991 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice pare -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - -File: readline.info, Node: Top, Next: Command Line Editing, Prev: (DIR), Up: (DIR) - -GNU Readline Library -******************** - - This document describes the GNU Readline Library, a utility which -aids in the consistency of user interface across discrete programs that -need to provide a command line interface. - -* Menu: - -* Command Line Editing:: GNU Readline User's Manual. -* Programming with GNU Readline:: GNU Readline Programmer's Manual. -* Concept Index:: Index of concepts described in this manual. -* Function and Variable Index:: Index of externally visible functions - and variables. - - -File: readline.info, Node: Command Line Editing, Next: Programming with GNU Readline, Prev: Top, Up: Top - -Command Line Editing -******************** - - This chapter describes the basic features of the GNU command line -editing interface. - -* Menu: - -* Introduction and Notation:: Notation used in this text. -* Readline Interaction:: The minimum set of commands for editing a line. -* Readline Init File:: Customizing Readline from a user's view. -* Bindable Readline Commands:: A description of most of the Readline commands - available for binding -* Readline vi Mode:: A short description of how to make Readline - behave like the vi editor. - - -File: readline.info, Node: Introduction and Notation, Next: Readline Interaction, Up: Command Line Editing - -Introduction to Line Editing -============================ - - The following paragraphs describe the notation used to represent -keystrokes. - - The text C-k is read as `Control-K' and describes the character -produced when the Control key is depressed and the k key is struck. - - The text M-k is read as `Meta-K' and describes the character -produced when the meta key (if you have one) is depressed, and the k -key is struck. If you do not have a meta key, the identical keystroke -can be generated by typing ESC first, and then typing k. Either -process is known as "metafying" the k key. - - The text M-C-k is read as `Meta-Control-k' and describes the -character produced by "metafying" C-k. - - In addition, several keys have their own names. Specifically, DEL, -ESC, LFD, SPC, RET, and TAB all stand for themselves when seen in this -text, or in an init file (*note Readline Init File::., for more info). - - -File: readline.info, Node: Readline Interaction, Next: Readline Init File, Prev: Introduction and Notation, Up: Command Line Editing - -Readline Interaction -==================== - - Often during an interactive session you type in a long line of text, -only to notice that the first word on the line is misspelled. The -Readline library gives you a set of commands for manipulating the text -as you type it in, allowing you to just fix your typo, and not forcing -you to retype the majority of the line. Using these editing commands, -you move the cursor to the place that needs correction, and delete or -insert the text of the corrections. Then, when you are satisfied with -the line, you simply press RETURN. You do not have to be at the end of -the line to press RETURN; the entire line is accepted regardless of the -location of the cursor within the line. - -* Menu: - -* Readline Bare Essentials:: The least you need to know about Readline. -* Readline Movement Commands:: Moving about the input line. -* Readline Killing Commands:: How to delete text, and how to get it back! -* Readline Arguments:: Giving numeric arguments to commands. - - -File: readline.info, Node: Readline Bare Essentials, Next: Readline Movement Commands, Up: Readline Interaction - -Readline Bare Essentials ------------------------- - - In order to enter characters into the line, simply type them. The -typed character appears where the cursor was, and then the cursor moves -one space to the right. If you mistype a character, you can use your -erase character to back up and delete the mistyped character. - - Sometimes you may miss typing a character that you wanted to type, -and not notice your error until you have typed several other -characters. In that case, you can type C-b to move the cursor to the -left, and then correct your mistake. Afterwards, you can move the -cursor to the right with C-f. - - When you add text in the middle of a line, you will notice that -characters to the right of the cursor are `pushed over' to make room -for the text that you have inserted. Likewise, when you delete text -behind the cursor, characters to the right of the cursor are `pulled -back' to fill in the blank space created by the removal of the text. A -list of the basic bare essentials for editing the text of an input line -follows. - -C-b - Move back one character. - -C-f - Move forward one character. - -DEL - Delete the character to the left of the cursor. - -C-d - Delete the character underneath the cursor. - -Printing characters - Insert the character into the line at the cursor. - -C-_ - Undo the last thing that you did. You can undo all the way back - to an empty line. - - -File: readline.info, Node: Readline Movement Commands, Next: Readline Killing Commands, Prev: Readline Bare Essentials, Up: Readline Interaction - -Readline Movement Commands --------------------------- - - The above table describes the most basic possible keystrokes that -you need in order to do editing of the input line. For your -convenience, many other commands have been added in addition to C-b, -C-f, C-d, and DEL. Here are some commands for moving more rapidly -about the line. - -C-a - Move to the start of the line. - -C-e - Move to the end of the line. - -M-f - Move forward a word. - -M-b - Move backward a word. - -C-l - Clear the screen, reprinting the current line at the top. - - Notice how C-f moves forward a character, while M-f moves forward a -word. It is a loose convention that control keystrokes operate on -characters while meta keystrokes operate on words. - - -File: readline.info, Node: Readline Killing Commands, Next: Readline Arguments, Prev: Readline Movement Commands, Up: Readline Interaction - -Readline Killing Commands -------------------------- - - "Killing" text means to delete the text from the line, but to save -it away for later use, usually by "yanking" (re-inserting) it back into -the line. If the description for a command says that it `kills' text, -then you can be sure that you can get the text back in a different (or -the same) place later. - - When you use a kill command, the text is saved in a "kill-ring". -Any number of consecutive kills save all of the killed text together, so -that when you yank it back, you get it all. The kill ring is not line -specific; the text that you killed on a previously typed line is -available to be yanked back later, when you are typing another line. - - Here is the list of commands for killing text. - -C-k - Kill the text from the current cursor position to the end of the - line. - -M-d - Kill from the cursor to the end of the current word, or if between - words, to the end of the next word. - -M-DEL - Kill from the cursor the start of the previous word, or if between - words, to the start of the previous word. - -C-w - Kill from the cursor to the previous whitespace. This is - different than M-DEL because the word boundaries differ. - - And, here is how to "yank" the text back into the line. Yanking -means to copy the most-recently-killed text from the kill buffer. - -C-y - Yank the most recently killed text back into the buffer at the - cursor. - -M-y - Rotate the kill-ring, and yank the new top. You can only do this - if the prior command is C-y or M-y. - - -File: readline.info, Node: Readline Arguments, Prev: Readline Killing Commands, Up: Readline Interaction - -Readline Arguments ------------------- - - You can pass numeric arguments to Readline commands. Sometimes the -argument acts as a repeat count, other times it is the sign of the -argument that is significant. If you pass a negative argument to a -command which normally acts in a forward direction, that command will -act in a backward direction. For example, to kill text back to the -start of the line, you might type M- C-k. - - The general way to pass numeric arguments to a command is to type -meta digits before the command. If the first `digit' you type is a -minus sign (-), then the sign of the argument will be negative. Once -you have typed one meta digit to get the argument started, you can type -the remainder of the digits, and then the command. For example, to give -the C-d command an argument of 10, you could type M-1 0 C-d. - - -File: readline.info, Node: Readline Init File, Next: Bindable Readline Commands, Prev: Readline Interaction, Up: Command Line Editing - -Readline Init File -================== - - Although the Readline library comes with a set of Emacs-like -keybindings installed by default, it is possible that you would like to -use a different set of keybindings. You can customize programs that -use Readline by putting commands in an "init" file in your home -directory. The name of this file is taken from the value of the -environment variable `INPUTRC'. If that variable is unset, the default -is `~/.inputrc'. - - When a program which uses the Readline library starts up, the init -file is read, and the key bindings are set. - - In addition, the `C-x C-r' command re-reads this init file, thus -incorporating any changes that you might have made to it. - -* Menu: - -* Readline Init Syntax:: Syntax for the commands in the inputrc file. -* Conditional Init Constructs:: Conditional key bindings in the inputrc file. - - -File: readline.info, Node: Readline Init Syntax, Next: Conditional Init Constructs, Up: Readline Init File - -Readline Init Syntax --------------------- - - There are only a few basic constructs allowed in the Readline init -file. Blank lines are ignored. Lines beginning with a # are comments. -Lines beginning with a $ indicate conditional constructs (*note -Conditional Init Constructs::.). Other lines denote variable settings -and key bindings. - -Variable Settings - You can change the state of a few variables in Readline by using - the `set' command within the init file. Here is how you would - specify that you wish to use `vi' line editing commands: - - set editing-mode vi - - Right now, there are only a few variables which can be set; so - few, in fact, that we just list them here: - - `editing-mode' - The `editing-mode' variable controls which editing mode you - are using. By default, Readline starts up in Emacs editing - mode, where the keystrokes are most similar to Emacs. This - variable can be set to either `emacs' or `vi'. - - `horizontal-scroll-mode' - This variable can be set to either `On' or `Off'. Setting it - to `On' means that the text of the lines that you edit will - scroll horizontally on a single screen line when they are - longer than the width of the screen, instead of wrapping onto - a new screen line. By default, this variable is set to `Off'. - - `mark-modified-lines' - This variable, when set to `On', says to display an asterisk - (`*') at the start of history lines which have been modified. - This variable is `off' by default. - - `bell-style' - Controls what happens when Readline wants to ring the - terminal bell. If set to `none', Readline never rings the - bell. If set to `visible', Readline uses a visible bell if - one is available. If set to `audible' (the default), - Readline attempts to ring the terminal's bell. - - `comment-begin' - The string to insert at the beginning of the line when the - `vi-comment' command is executed. The default value is `"#"'. - - `meta-flag' - If set to `on', Readline will enable eight-bit input (it will - not strip the eighth bit from the characters it reads), - regardless of what the terminal claims it can support. The - default value is `off'. - - `convert-meta' - If set to `on', Readline will convert characters with the - eigth bit set to an ASCII key sequence by stripping the eigth - bit and prepending an ESC character, converting them to a - meta-prefixed key sequence. The default value is `on'. - - `output-meta' - If set to `on', Readline will display characters with the - eighth bit set directly rather than as a meta-prefixed escape - sequence. The default is `off'. - - `completion-query-items' - The number of possible completions that determines when the - user is asked whether he wants to see the list of - possibilities. If the number of possible completions is - greater than this value, Readline will ask the user whether - or not he wishes to view them; otherwise, they are simply - listed. The default limit is `100'. - - `keymap' - Sets Readline's idea of the current keymap for key binding - commands. Acceptable `keymap' names are `emacs', - `emacs-standard', `emacs-meta', `emacs-ctlx', `vi', `vi-move', - `vi-command', and `vi-insert'. `vi' is equivalent to - `vi-command'; `emacs' is equivalent to `emacs-standard'. The - default value is `emacs'. The value of the `editing-mode' - variable also affects the default keymap. - - `show-all-if-ambiguous' - This alters the default behavior of the completion functions. - If set to `on', words which have more than one possible - completion cause the matches to be listed immediately instead - of ringing the bell. The default value is `off'. - - `expand-tilde' - If set to `on', tilde expansion is performed when Readline - attempts word completion. The default is `off'. - -Key Bindings - The syntax for controlling key bindings in the init file is - simple. First you have to know the name of the command that you - want to change. The following pages contain tables of the command - name, the default keybinding, and a short description of what the - command does. - - Once you know the name of the command, simply place the name of - the key you wish to bind the command to, a colon, and then the - name of the command on a line in the init file. The name of the - key can be expressed in different ways, depending on which is most - comfortable for you. - - KEYNAME: FUNCTION-NAME or MACRO - KEYNAME is the name of a key spelled out in English. For - example: - Control-u: universal-argument - Meta-Rubout: backward-kill-word - Control-o: ">&output" - - In the above example, `C-u' is bound to the function - `universal-argument', and `C-o' is bound to run the macro - expressed on the right hand side (that is, to insert the text - `>&output' into the line). - - "KEYSEQ": FUNCTION-NAME or MACRO - KEYSEQ differs from KEYNAME above in that strings denoting an - entire key sequence can be specified, by placing the key - sequence in double quotes. Some GNU Emacs style key escapes - can be used, as in the following example, but the special - character names are not recognized. - - "\C-u": universal-argument - "\C-x\C-r": re-read-init-file - "\e[11~": "Function Key 1" - - In the above example, `C-u' is bound to the function - `universal-argument' (just as it was in the first example), - `C-x C-r' is bound to the function `re-read-init-file', and - `ESC [ 1 1 ~' is bound to insert the text `Function Key 1'. - The following escape sequences are available when specifying - key sequences: - - ``\C-'' - control prefix - - ``\M-'' - meta prefix - - ``\e'' - an escape character - - ``\\'' - backslash - - ``\"'' - " - - ``\''' - ' - - When entering the text of a macro, single or double quotes - should be used to indicate a macro definition. Unquoted text - is assumed to be a function name. Backslash will quote any - character in the macro text, including " and '. For example, - the following binding will make `C-x \' insert a single \ - into the line: - "\C-x\\": "\\" - - -File: readline.info, Node: Conditional Init Constructs, Prev: Readline Init Syntax, Up: Readline Init File - -Conditional Init Constructs ---------------------------- - - Readline implements a facility similar in spirit to the conditional -compilation features of the C preprocessor which allows key bindings -and variable settings to be performed as the result of tests. There -are three parser directives used. - -`$if' - The `$if' construct allows bindings to be made based on the - editing mode, the terminal being used, or the application using - Readline. The text of the test extends to the end of the line; no - characters are required to isolate it. - - `mode' - The `mode=' form of the `$if' directive is used to test - whether Readline is in `emacs' or `vi' mode. This may be - used in conjunction with the `set keymap' command, for - instance, to set bindings in the `emacs-standard' and - `emacs-ctlx' keymaps only if Readline is starting out in - `emacs' mode. - - `term' - The `term=' form may be used to include terminal-specific key - bindings, perhaps to bind the key sequences output by the - terminal's function keys. The word on the right side of the - `=' is tested against the full name of the terminal and the - portion of the terminal name before the first `-'. This - allows SUN to match both SUN and SUN-CMD, for instance. - - `application' - The APPLICATION construct is used to include - application-specific settings. Each program using the - Readline library sets the APPLICATION NAME, and you can test - for it. This could be used to bind key sequences to - functions useful for a specific program. For instance, the - following command adds a key sequence that quotes the current - or previous word in Bash: - $if bash - # Quote the current or previous word - "\C-xq": "\eb\"\ef\"" - $endif - -`$endif' - This command, as you saw in the previous example, terminates an - `$if' command. - -`$else' - Commands in this branch of the `$if' directive are executed if the - test fails. - - -File: readline.info, Node: Bindable Readline Commands, Next: Readline vi Mode, Prev: Readline Init File, Up: Command Line Editing - -Bindable Readline Commands -========================== - -* Menu: - -* Commands For Moving:: Moving about the line. -* Commands For History:: Getting at previous lines. -* Commands For Text:: Commands for changing text. -* Commands For Killing:: Commands for killing and yanking. -* Numeric Arguments:: Specifying numeric arguments, repeat counts. -* Commands For Completion:: Getting Readline to do the typing for you. -* Keyboard Macros:: Saving and re-executing typed characters -* Miscellaneous Commands:: Other miscellaneous commands. - - -File: readline.info, Node: Commands For Moving, Next: Commands For History, Up: Bindable Readline Commands - -Commands For Moving -------------------- - -`beginning-of-line (C-a)' - Move to the start of the current line. - -`end-of-line (C-e)' - Move to the end of the line. - -`forward-char (C-f)' - Move forward a character. - -`backward-char (C-b)' - Move back a character. - -`forward-word (M-f)' - Move forward to the end of the next word. Words are composed of - letters and digits. - -`backward-word (M-b)' - Move back to the start of this, or the previous, word. Words are - composed of letters and digits. - -`clear-screen (C-l)' - Clear the screen and redraw the current line, leaving the current - line at the top of the screen. - -`redraw-current-line ()' - Refresh the current line. By default, this is unbound. - - -File: readline.info, Node: Commands For History, Next: Commands For Text, Prev: Commands For Moving, Up: Bindable Readline Commands - -Commands For Manipulating The History -------------------------------------- - -`accept-line (Newline, Return)' - Accept the line regardless of where the cursor is. If this line is - non-empty, add it to the history list. If this line was a history - line, then restore the history line to its original state. - -`previous-history (C-p)' - Move `up' through the history list. - -`next-history (C-n)' - Move `down' through the history list. - -`beginning-of-history (M-<)' - Move to the first line in the history. - -`end-of-history (M->)' - Move to the end of the input history, i.e., the line you are - entering. - -`reverse-search-history (C-r)' - Search backward starting at the current line and moving `up' - through the history as necessary. This is an incremental search. - -`forward-search-history (C-s)' - Search forward starting at the current line and moving `down' - through the the history as necessary. This is an incremental - search. - -`non-incremental-reverse-search-history (M-p)' - Search backward starting at the current line and moving `up' - through the history as necessary using a non-incremental search - for a string supplied by the user. - -`non-incremental-forward-search-history (M-n)' - Search forward starting at the current line and moving `down' - through the the history as necessary using a non-incremental search - for a string supplied by the user. - -`history-search-forward ()' - Search forward through the history for the string of characters - between the start of the current line and the current point. This - is a non-incremental search. By default, this command is unbound. - -`history-search-backward ()' - Search backward through the history for the string of characters - between the start of the current line and the current point. This - is a non-incremental search. By default, this command is unbound. - -`yank-nth-arg (M-C-y)' - Insert the first argument to the previous command (usually the - second word on the previous line). With an argument N, insert the - Nth word from the previous command (the words in the previous - command begin with word 0). A negative argument inserts the Nth - word from the end of the previous command. - -`yank-last-arg (M-., M-_)' - Insert last argument to the previous command (the last word on the - previous line). With an argument, behave exactly like - `yank-nth-arg'. - - -File: readline.info, Node: Commands For Text, Next: Commands For Killing, Prev: Commands For History, Up: Bindable Readline Commands - -Commands For Changing Text --------------------------- - -`delete-char (C-d)' - Delete the character under the cursor. If the cursor is at the - beginning of the line, there are no characters in the line, and - the last character typed was not C-d, then return EOF. - -`backward-delete-char (Rubout)' - Delete the character behind the cursor. A numeric arg says to kill - the characters instead of deleting them. - -`quoted-insert (C-q, C-v)' - Add the next character that you type to the line verbatim. This is - how to insert key sequences like C-q, for example. - -`tab-insert (M-TAB)' - Insert a tab character. - -`self-insert (a, b, A, 1, !, ...)' - Insert yourself. - -`transpose-chars (C-t)' - Drag the character before the cursor forward over the character at - the cursor, moving the cursor forward as well. If the insertion - point is at the end of the line, then this transposes the last two - characters of the line. Negative argumentss don't work. - -`transpose-words (M-t)' - Drag the word behind the cursor past the word in front of the - cursor moving the cursor over that word as well. - -`upcase-word (M-u)' - Uppercase the current (or following) word. With a negative - argument, do the previous word, but do not move the cursor. - -`downcase-word (M-l)' - Lowercase the current (or following) word. With a negative - argument, do the previous word, but do not move the cursor. - -`capitalize-word (M-c)' - Capitalize the current (or following) word. With a negative - argument, do the previous word, but do not move the cursor. - - -File: readline.info, Node: Commands For Killing, Next: Numeric Arguments, Prev: Commands For Text, Up: Bindable Readline Commands - -Killing And Yanking -------------------- - -`kill-line (C-k)' - Kill the text from the current cursor position to the end of the - line. - -`backward-kill-line (C-x Rubout)' - Kill backward to the beginning of the line. - -`unix-line-discard (C-u)' - Kill backward from the cursor to the beginning of the current line. - Save the killed text on the kill-ring. - -`kill-whole-line ()' - Kill all characters on the current line, no matter where the - cursor is. By default, this is unbound. - -`kill-word (M-d)' - Kill from the cursor to the end of the current word, or if between - words, to the end of the next word. Word boundaries are the same - as `forward-word'. - -`backward-kill-word (M-DEL)' - Kill the word behind the cursor. Word boundaries are the same as - `backward-word'. - -`unix-word-rubout (C-w)' - Kill the word behind the cursor, using white space as a word - boundary. The killed text is saved on the kill-ring. - -`delete-horizontal-space ()' - Delete all spaces and tabs around point. By default, this is - unbound. - -`yank (C-y)' - Yank the top of the kill ring into the buffer at the current - cursor position. - -`yank-pop (M-y)' - Rotate the kill-ring, and yank the new top. You can only do this - if the prior command is yank or yank-pop. - - -File: readline.info, Node: Numeric Arguments, Next: Commands For Completion, Prev: Commands For Killing, Up: Bindable Readline Commands - -Specifying Numeric Arguments ----------------------------- - -`digit-argument (M-0, M-1, ... M--)' - Add this digit to the argument already accumulating, or start a new - argument. M- starts a negative argument. - -`universal-argument ()' - Each time this is executed, the argument count is multiplied by - four. The argument count is initially one, so executing this - function the first time makes the argument count four. By - default, this is not bound to a key. - - -File: readline.info, Node: Commands For Completion, Next: Keyboard Macros, Prev: Numeric Arguments, Up: Bindable Readline Commands - -Letting Readline Type For You ------------------------------ - -`complete (TAB)' - Attempt to do completion on the text before the cursor. This is - application-specific. Generally, if you are typing a filename - argument, you can do filename completion; if you are typing a - command, you can do command completion, if you are typing in a - symbol to GDB, you can do symbol name completion, if you are - typing in a variable to Bash, you can do variable name completion, - and so on. - -`possible-completions (M-?)' - List the possible completions of the text before the cursor. - -`insert-completions ()' - Insert all completions of the text before point that would have - been generated by `possible-completions'. By default, this is not - bound to a key. - - -File: readline.info, Node: Keyboard Macros, Next: Miscellaneous Commands, Prev: Commands For Completion, Up: Bindable Readline Commands - -Keyboard Macros ---------------- - -`start-kbd-macro (C-x ()' - Begin saving the characters typed into the current keyboard macro. - -`end-kbd-macro (C-x ))' - Stop saving the characters typed into the current keyboard macro - and save the definition. - -`call-last-kbd-macro (C-x e)' - Re-execute the last keyboard macro defined, by making the - characters in the macro appear as if typed at the keyboard. - - -File: readline.info, Node: Miscellaneous Commands, Prev: Keyboard Macros, Up: Bindable Readline Commands - -Some Miscellaneous Commands ---------------------------- - -`re-read-init-file (C-x C-r)' - Read in the contents of your init file, and incorporate any - bindings or variable assignments found there. - -`abort (C-g)' - Abort the current editing command and ring the terminal's bell - (subject to the setting of `bell-style'). - -`do-uppercase-version (M-a, M-b, ...)' - Run the command that is bound to the corresoponding uppercase - character. - -`prefix-meta (ESC)' - Make the next character that you type be metafied. This is for - people without a meta key. Typing `ESC f' is equivalent to typing - `M-f'. - -`undo (C-_, C-x C-u)' - Incremental undo, separately remembered for each line. - -`revert-line (M-r)' - Undo all changes made to this line. This is like typing the `undo' - command enough times to get back to the beginning. - -`tilde-expand (M-~)' - Perform tilde expansion on the current word. - -`dump-functions ()' - Print all of the functions and their key bindings to the readline - output stream. If a numeric argument is supplied, the output is - formatted in such a way that it can be made part of an INPUTRC - file. - - -File: readline.info, Node: Readline vi Mode, Prev: Bindable Readline Commands, Up: Command Line Editing - -Readline vi Mode -================ - - While the Readline library does not have a full set of `vi' editing -functions, it does contain enough to allow simple editing of the line. -The Readline `vi' mode behaves as specified in the Posix 1003.2 -standard. - - In order to switch interactively between `Emacs' and `Vi' editing -modes, use the command M-C-j (toggle-editing-mode). The Readline -default is `emacs' mode. - - When you enter a line in `vi' mode, you are already placed in -`insertion' mode, as if you had typed an `i'. Pressing ESC switches -you into `command' mode, where you can edit the text of the line with -the standard `vi' movement keys, move to previous history lines with -`k', and following lines with `j', and so forth. - - This document describes the GNU Readline Library, a utility for -aiding in the consitency of user interface across discrete programs -that need to provide a command line interface. - - Copyright (C) 1988, 1994 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice pare -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - -File: readline.info, Node: Programming with GNU Readline, Next: Concept Index, Prev: Command Line Editing, Up: Top - -Programming with GNU Readline -***************************** - - This chapter describes the interface between the GNU Readline -Library and other programs. If you are a programmer, and you wish to -include the features found in GNU Readline such as completion, line -editing, and interactive history manipulation in your own programs, -this section is for you. - -* Menu: - -* Basic Behavior:: Using the default behavior of Readline. -* Custom Functions:: Adding your own functions to Readline. -* Readline Convenience Functions:: Functions which Readline supplies to - aid in writing your own -* Custom Completers:: Supplanting or supplementing Readline's - completion functions. - - -File: readline.info, Node: Basic Behavior, Next: Custom Functions, Up: Programming with GNU Readline - -Basic Behavior -============== - - Many programs provide a command line interface, such as `mail', -`ftp', and `sh'. For such programs, the default behaviour of Readline -is sufficient. This section describes how to use Readline in the -simplest way possible, perhaps to replace calls in your code to -`gets()' or `fgets ()'. - - The function `readline ()' prints a prompt and then reads and returns -a single line of text from the user. The line `readline' returns is -allocated with `malloc ()'; you should `free ()' the line when you are -done with it. The declaration for `readline' in ANSI C is - - `char *readline (char *PROMPT);' - -So, one might say - `char *line = readline ("Enter a line: ");' - -in order to read a line of text from the user. The line returned has -the final newline removed, so only the text remains. - - If `readline' encounters an `EOF' while reading the line, and the -line is empty at that point, then `(char *)NULL' is returned. -Otherwise, the line is ended just as if a newline had been typed. - - If you want the user to be able to get at the line later, (with C-p -for example), you must call `add_history ()' to save the line away in a -"history" list of such lines. - - `add_history (line)'; - -For full details on the GNU History Library, see the associated manual. - - It is preferable to avoid saving empty lines on the history list, -since users rarely have a burning need to reuse a blank line. Here is -a function which usefully replaces the standard `gets ()' library -function, and has the advantage of no static buffer to overflow: - - /* A static variable for holding the line. */ - static char *line_read = (char *)NULL; - - /* Read a string, and return a pointer to it. Returns NULL on EOF. */ - char * - rl_gets () - { - /* If the buffer has already been allocated, return the memory - to the free pool. */ - if (line_read) - { - free (line_read); - line_read = (char *)NULL; - } - - /* Get a line from the user. */ - line_read = readline (""); - - /* If the line has any text in it, save it on the history. */ - if (line_read && *line_read) - add_history (line_read); - - return (line_read); - } - - This function gives the user the default behaviour of TAB -completion: completion on file names. If you do not want Readline to -complete on filenames, you can change the binding of the TAB key with -`rl_bind_key ()'. - - `int rl_bind_key (int KEY, int (*FUNCTION)());' - - `rl_bind_key ()' takes two arguments: KEY is the character that you -want to bind, and FUNCTION is the address of the function to call when -KEY is pressed. Binding TAB to `rl_insert ()' makes TAB insert itself. -`rl_bind_key ()' returns non-zero if KEY is not a valid ASCII character -code (between 0 and 255). - - Thus, to disable the default TAB behavior, the following suffices: - `rl_bind_key ('\t', rl_insert);' - - This code should be executed once at the start of your program; you -might write a function called `initialize_readline ()' which performs -this and other desired initializations, such as installing custom -completers (*note Custom Completers::.). - - -File: readline.info, Node: Custom Functions, Next: Readline Convenience Functions, Prev: Basic Behavior, Up: Programming with GNU Readline - -Custom Functions -================ - - Readline provides many functions for manipulating the text of the -line, but it isn't possible to anticipate the needs of all programs. -This section describes the various functions and variables defined -within the Readline library which allow a user program to add -customized functionality to Readline. - -* Menu: - -* The Function Type:: C declarations to make code readable. -* Function Writing:: Variables and calling conventions. - - -File: readline.info, Node: The Function Type, Next: Function Writing, Up: Custom Functions - -The Function Type ------------------ - - For readabilty, we declare a new type of object, called "Function". -A `Function' is a C function which returns an `int'. The type -declaration for `Function' is: - -`typedef int Function ();' - - The reason for declaring this new type is to make it easier to write -code describing pointers to C functions. Let us say we had a variable -called FUNC which was a pointer to a function. Instead of the classic -C declaration - - `int (*)()func;' - -we may write - - `Function *func;' - -Similarly, there are - - typedef void VFunction (); - typedef char *CPFunction (); and - typedef char **CPPFunction (); - -for functions returning no value, `pointer to char', and `pointer to -pointer to char', respectively. - - -File: readline.info, Node: Function Writing, Prev: The Function Type, Up: Custom Functions - -Writing a New Function ----------------------- - - In order to write new functions for Readline, you need to know the -calling conventions for keyboard-invoked functions, and the names of the -variables that describe the current state of the line read so far. - - The calling sequence for a command `foo' looks like - - `foo (int count, int key)' - -where COUNT is the numeric argument (or 1 if defaulted) and KEY is the -key that invoked this function. - - It is completely up to the function as to what should be done with -the numeric argument. Some functions use it as a repeat count, some as -a flag, and others to choose alternate behavior (refreshing the current -line as opposed to refreshing the screen, for example). Some choose to -ignore it. In general, if a function uses the numeric argument as a -repeat count, it should be able to do something useful with both -negative and positive arguments. At the very least, it should be aware -that it can be passed a negative argument. - - - Variable: char * rl_line_buffer - This is the line gathered so far. You are welcome to modify the - contents of the line, but see *Note Allowing Undoing::. - - - Variable: int rl_point - The offset of the current cursor position in `rl_line_buffer' (the - *point*). - - - Variable: int rl_end - The number of characters present in `rl_line_buffer'. When - `rl_point' is at the end of the line, `rl_point' and `rl_end' are - equal. - - - Variable: int rl_mark - The mark (saved position) in the current line. If set, the mark - and point define a *region*. - - - Variable: int rl_done - Setting this to a non-zero value causes Readline to return the - current line immediately. - - - Variable: int rl_pending_input - Setting this to a value makes it the next keystroke read. This is - a way to stuff a single character into the input stream. - - - Variable: char * rl_prompt - The prompt Readline uses. This is set from the argument to - `readline ()', and should not be assigned to directly. - - - Variable: char * rl_terminal_name - The terminal type, used for initialization. - - - Variable: char * rl_readline_name - This variable is set to a unique name by each application using - Readline. The value allows conditional parsing of the inputrc file - (*note Conditional Init Constructs::.). - - - Variable: FILE * rl_instream - The stdio stream from which Readline reads input. - - - Variable: FILE * rl_outstream - The stdio stream to which Readline performs output. - - - Variable: Function * rl_startup_hook - If non-zero, this is the address of a function to call just before - `readline' prints the first prompt. - - -File: readline.info, Node: Readline Convenience Functions, Next: Custom Completers, Prev: Custom Functions, Up: Programming with GNU Readline - -Readline Convenience Functions -============================== - -* Menu: - -* Function Naming:: How to give a function you write a name. -* Keymaps:: Making keymaps. -* Binding Keys:: Changing Keymaps. -* Associating Function Names and Bindings:: Translate function names to - key sequences. -* Allowing Undoing:: How to make your functions undoable. -* Redisplay:: Functions to control line display. -* Modifying Text:: Functions to modify `rl_line_buffer'. -* Utility Functions:: Generally useful functions and hooks. - - -File: readline.info, Node: Function Naming, Next: Keymaps, Up: Readline Convenience Functions - -Naming a Function ------------------ - - The user can dynamically change the bindings of keys while using -Readline. This is done by representing the function with a descriptive -name. The user is able to type the descriptive name when referring to -the function. Thus, in an init file, one might find - - Meta-Rubout: backward-kill-word - - This binds the keystroke Meta-Rubout to the function *descriptively* -named `backward-kill-word'. You, as the programmer, should bind the -functions you write to descriptive names as well. Readline provides a -function for doing that: - - - Function: int rl_add_defun (char *name, Function *function, int key) - Add NAME to the list of named functions. Make FUNCTION be the - function that gets called. If KEY is not -1, then bind it to - FUNCTION using `rl_bind_key ()'. - - Using this function alone is sufficient for most applications. It is -the recommended way to add a few functions to the default functions that -Readline has built in. If you need to do something other than adding a -function to Readline, you may need to use the underlying functions -described below. - - -File: readline.info, Node: Keymaps, Next: Binding Keys, Prev: Function Naming, Up: Readline Convenience Functions - -Selecting a Keymap ------------------- - - Key bindings take place on a "keymap". The keymap is the -association between the keys that the user types and the functions that -get run. You can make your own keymaps, copy existing keymaps, and tell -Readline which keymap to use. - - - Function: Keymap rl_make_bare_keymap () - Returns a new, empty keymap. The space for the keymap is - allocated with `malloc ()'; you should `free ()' it when you are - done. - - - Function: Keymap rl_copy_keymap (Keymap map) - Return a new keymap which is a copy of MAP. - - - Function: Keymap rl_make_keymap () - Return a new keymap with the printing characters bound to - rl_insert, the lowercase Meta characters bound to run their - equivalents, and the Meta digits bound to produce numeric - arguments. - - - Function: void rl_discard_keymap (Keymap keymap) - Free the storage associated with KEYMAP. - - Readline has several internal keymaps. These functions allow you to -change which keymap is active. - - - Function: Keymap rl_get_keymap () - Returns the currently active keymap. - - - Function: void rl_set_keymap (Keymap keymap) - Makes KEYMAP the currently active keymap. - - - Function: Keymap rl_get_keymap_by_name (char *name) - Return the keymap matching NAME. NAME is one which would be - supplied in a `set keymap' inputrc line (*note Readline Init - File::.). - - -File: readline.info, Node: Binding Keys, Next: Associating Function Names and Bindings, Prev: Keymaps, Up: Readline Convenience Functions - -Binding Keys ------------- - - You associate keys with functions through the keymap. Readline has -several internal keymaps: `emacs_standard_keymap', `emacs_meta_keymap', -`emacs_ctlx_keymap', `vi_movement_keymap', and `vi_insertion_keymap'. -`emacs_standard_keymap' is the default, and the examples in this manual -assume that. - - These functions manage key bindings. - - - Function: int rl_bind_key (int key, Function *function) - Binds KEY to FUNCTION in the currently active keymap. Returns - non-zero in the case of an invalid KEY. - - - Function: int rl_bind_key_in_map (int key, Function *function, - Keymap map) - Bind KEY to FUNCTION in MAP. Returns non-zero in the case of an - invalid KEY. - - - Function: int rl_unbind_key (int key) - Bind KEY to the null function in the currently active keymap. - Returns non-zero in case of error. - - - Function: int rl_unbind_key_in_map (int key, Keymap map) - Bind KEY to the null function in MAP. Returns non-zero in case of - error. - - - Function: int rl_generic_bind (int type, char *keyseq, char *data, - Keymap map) - Bind the key sequence represented by the string KEYSEQ to the - arbitrary pointer DATA. TYPE says what kind of data is pointed to - by DATA; this can be a function (`ISFUNC'), a macro (`ISMACR'), or - a keymap (`ISKMAP'). This makes new keymaps as necessary. The - initial keymap in which to do bindings is MAP. - - - Function: int rl_parse_and_bind (char *line) - Parse LINE as if it had been read from the `inputrc' file and - perform any key bindings and variable assignments found (*note - Readline Init File::.). - - -File: readline.info, Node: Associating Function Names and Bindings, Next: Allowing Undoing, Prev: Binding Keys, Up: Readline Convenience Functions - -Associating Function Names and Bindings ---------------------------------------- - - These functions allow you to find out what keys invoke named -functions and the functions invoked by a particular key sequence. - - - Function: Function * rl_named_function (char *name) - Return the function with name NAME. - - - Function: Function * rl_function_of_keyseq (char *keyseq, Keymap - map, int *type) - Return the function invoked by KEYSEQ in keymap MAP. If MAP is - NULL, the current keymap is used. If TYPE is not NULL, the type - of the object is returned in it (one of `ISFUNC', `ISKMAP', or - `ISMACR'). - - - Function: char ** rl_invoking_keyseqs (Function *function) - Return an array of strings representing the key sequences used to - invoke FUNCTION in the current keymap. - - - Function: char ** rl_invoking_keyseqs_in_map (Function *function, - Keymap map) - Return an array of strings representing the key sequences used to - invoke FUNCTION in the keymap MAP. - - -File: readline.info, Node: Allowing Undoing, Next: Redisplay, Prev: Associating Function Names and Bindings, Up: Readline Convenience Functions - -Allowing Undoing ----------------- - - Supporting the undo command is a painless thing, and makes your -functions much more useful. It is certainly easy to try something if -you know you can undo it. I could use an undo function for the stock -market. - - If your function simply inserts text once, or deletes text once, and -uses `rl_insert_text ()' or `rl_delete_text ()' to do it, then undoing -is already done for you automatically. - - If you do multiple insertions or multiple deletions, or any -combination of these operations, you should group them together into -one operation. This is done with `rl_begin_undo_group ()' and -`rl_end_undo_group ()'. - - The types of events that can be undone are: - - enum undo_code { UNDO_DELETE, UNDO_INSERT, UNDO_BEGIN, UNDO_END }; - - Notice that `UNDO_DELETE' means to insert some text, and -`UNDO_INSERT' means to delete some text. That is, the undo code tells -undo what to undo, not how to undo it. `UNDO_BEGIN' and `UNDO_END' are -tags added by `rl_begin_undo_group ()' and `rl_end_undo_group ()'. - - - Function: int rl_begin_undo_group () - Begins saving undo information in a group construct. The undo - information usually comes from calls to `rl_insert_text ()' and - `rl_delete_text ()', but could be the result of calls to - `rl_add_undo ()'. - - - Function: int rl_end_undo_group () - Closes the current undo group started with `rl_begin_undo_group - ()'. There should be one call to `rl_end_undo_group ()' for each - call to `rl_begin_undo_group ()'. - - - Function: void rl_add_undo (enum undo_code what, int start, int end, - char *text) - Remember how to undo an event (according to WHAT). The affected - text runs from START to END, and encompasses TEXT. - - - Function: void free_undo_list () - Free the existing undo list. - - - Function: int rl_do_undo () - Undo the first thing on the undo list. Returns `0' if there was - nothing to undo, non-zero if something was undone. - - Finally, if you neither insert nor delete text, but directly modify -the existing text (e.g., change its case), call `rl_modifying ()' once, -just before you modify the text. You must supply the indices of the -text range that you are going to modify. - - - Function: int rl_modifying (int start, int end) - Tell Readline to save the text between START and END as a single - undo unit. It is assumed that you will subsequently modify that - text. - - -File: readline.info, Node: Redisplay, Next: Modifying Text, Prev: Allowing Undoing, Up: Readline Convenience Functions - -Redisplay ---------- - - - Function: int rl_redisplay () - Change what's displayed on the screen to reflect the current - contents of `rl_line_buffer'. - - - Function: int rl_forced_update_display () - Force the line to be updated and redisplayed, whether or not - Readline thinks the screen display is correct. - - - Function: int rl_on_new_line () - Tell the update routines that we have moved onto a new (empty) - line, usually after ouputting a newline. - - - Function: int rl_reset_line_state () - Reset the display state to a clean state and redisplay the current - line starting on a new line. - - - Function: int rl_message (va_alist) - The arguments are a string as would be supplied to `printf'. The - resulting string is displayed in the "echo area". The echo area - is also used to display numeric arguments and search strings. - - - Function: int rl_clear_message () - Clear the message in the echo area. - diff --git a/gnu/lib/libreadline/doc/readline.info-2 b/gnu/lib/libreadline/doc/readline.info-2 deleted file mode 100644 index 35681aa235d0..000000000000 --- a/gnu/lib/libreadline/doc/readline.info-2 +++ /dev/null @@ -1,978 +0,0 @@ -This is Info file readline.info, produced by Makeinfo-1.55 from the -input file rlman.texinfo. - - This document describes the GNU Readline Library, a utility which -aids in the consistency of user interface across discrete programs that -need to provide a command line interface. - - Copyright (C) 1988, 1991 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice pare -preserved on all copies. - - Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that this permission notice may be stated in a -translation approved by the Foundation. - - -File: readline.info, Node: Modifying Text, Next: Utility Functions, Prev: Redisplay, Up: Readline Convenience Functions - -Modifying Text --------------- - - - Function: int rl_insert_text (char *text) - Insert TEXT into the line at the current cursor position. - - - Function: int rl_delete_text (int start, int end) - Delete the text between START and END in the current line. - - - Function: char * rl_copy_text (int start, int end) - Return a copy of the text between START and END in the current - line. - - - Function: int rl_kill_text (int start, int end) - Copy the text between START and END in the current line to the - kill ring, appending or prepending to the last kill if the last - command was a kill command. The text is deleted. If START is - less than END, the text is appended, otherwise prepended. If the - last command was not a kill, a new kill ring slot is used. - - -File: readline.info, Node: Utility Functions, Prev: Modifying Text, Up: Readline Convenience Functions - -Utility Functions ------------------ - - - Function: int rl_reset_terminal (char *terminal_name) - Reinitialize Readline's idea of the terminal settings using - TERMINAL_NAME as the terminal type (e.g., `vt100'). - - - Function: int alphabetic (int c) - Return 1 if C is an alphabetic character. - - - Function: int numeric (int c) - Return 1 if C is a numeric character. - - - Function: int ding () - Ring the terminal bell, obeying the setting of `bell-style'. - - The following are implemented as macros, defined in `chartypes.h'. - - - Function: int uppercase_p (int c) - Return 1 if C is an uppercase alphabetic character. - - - Function: int lowercase_p (int c) - Return 1 if C is a lowercase alphabetic character. - - - Function: int digit_p (int c) - Return 1 if C is a numeric character. - - - Function: int to_upper (int c) - If C is a lowercase alphabetic character, return the corresponding - uppercase character. - - - Function: int to_lower (int c) - If C is an uppercase alphabetic character, return the corresponding - lowercase character. - - - Function: int digit_value (int c) - If C is a number, return the value it represents. - -An Example ----------- - - Here is a function which changes lowercase characters to their -uppercase equivalents, and uppercase characters to lowercase. If this -function was bound to `M-c', then typing `M-c' would change the case of -the character under point. Typing `M-1 0 M-c' would change the case of -the following 10 characters, leaving the cursor on the last character -changed. - - /* Invert the case of the COUNT following characters. */ - int - invert_case_line (count, key) - int count, key; - { - register int start, end, i; - - start = rl_point; - - if (rl_point >= rl_end) - return (0); - - if (count < 0) - { - direction = -1; - count = -count; - } - else - direction = 1; - - /* Find the end of the range to modify. */ - end = start + (count * direction); - - /* Force it to be within range. */ - if (end > rl_end) - end = rl_end; - else if (end < 0) - end = 0; - - if (start == end) - return (0); - - if (start > end) - { - int temp = start; - start = end; - end = temp; - } - - /* Tell readline that we are modifying the line, so it will save - the undo information. */ - rl_modifying (start, end); - - for (i = start; i != end; i++) - { - if (uppercase_p (rl_line_buffer[i])) - rl_line_buffer[i] = to_lower (rl_line_buffer[i]); - else if (lowercase_p (rl_line_buffer[i])) - rl_line_buffer[i] = to_upper (rl_line_buffer[i]); - } - /* Move point to on top of the last character changed. */ - rl_point = (direction == 1) ? end - 1 : start; - return (0); - } - - -File: readline.info, Node: Custom Completers, Prev: Readline Convenience Functions, Up: Programming with GNU Readline - -Custom Completers -================= - - Typically, a program that reads commands from the user has a way of -disambiguating commands and data. If your program is one of these, then -it can provide completion for commands, data, or both. The following -sections describe how your program and Readline cooperate to provide -this service. - -* Menu: - -* How Completing Works:: The logic used to do completion. -* Completion Functions:: Functions provided by Readline. -* Completion Variables:: Variables which control completion. -* A Short Completion Example:: An example of writing completer subroutines. - - -File: readline.info, Node: How Completing Works, Next: Completion Functions, Up: Custom Completers - -How Completing Works --------------------- - - In order to complete some text, the full list of possible completions -must be available. That is, it is not possible to accurately expand a -partial word without knowing all of the possible words which make sense -in that context. The Readline library provides the user interface to -completion, and two of the most common completion functions: filename -and username. For completing other types of text, you must write your -own completion function. This section describes exactly what such -functions must do, and provides an example. - - There are three major functions used to perform completion: - - 1. The user-interface function `rl_complete ()'. This function is - called with the same arguments as other Readline functions - intended for interactive use: COUNT and INVOKING_KEY. It - isolates the word to be completed and calls `completion_matches - ()' to generate a list of possible completions. It then either - lists the possible completions, inserts the possible completions, - or actually performs the completion, depending on which behavior - is desired. - - 2. The internal function `completion_matches ()' uses your - "generator" function to generate the list of possible matches, and - then returns the array of these matches. You should place the - address of your generator function in - `rl_completion_entry_function'. - - 3. The generator function is called repeatedly from - `completion_matches ()', returning a string each time. The - arguments to the generator function are TEXT and STATE. TEXT is - the partial word to be completed. STATE is zero the first time - the function is called, allowing the generator to perform any - necessary initialization, and a positive non-zero integer for each - subsequent call. When the generator function returns `(char - *)NULL' this signals `completion_matches ()' that there are no - more possibilities left. Usually the generator function computes - the list of possible completions when STATE is zero, and returns - them one at a time on subsequent calls. Each string the generator - function returns as a match must be allocated with `malloc()'; - Readline frees the strings when it has finished with them. - - - - Function: int rl_complete (int ignore, int invoking_key) - Complete the word at or before point. You have supplied the - function that does the initial simple matching selection algorithm - (see `completion_matches ()'). The default is to do filename - completion. - - - Variable: Function * rl_completion_entry_function - This is a pointer to the generator function for `completion_matches - ()'. If the value of `rl_completion_entry_function' is `(Function - *)NULL' then the default filename generator function, - `filename_entry_function ()', is used. - - -File: readline.info, Node: Completion Functions, Next: Completion Variables, Prev: How Completing Works, Up: Custom Completers - -Completion Functions --------------------- - - Here is the complete list of callable completion functions present in -Readline. - - - Function: int rl_complete_internal (int what_to_do) - Complete the word at or before point. WHAT_TO_DO says what to do - with the completion. A value of `?' means list the possible - completions. `TAB' means do standard completion. `*' means - insert all of the possible completions. `!' means to display all - of the possible completions, if there is more than one, as well as - performing partial completion. - - - Function: int rl_complete (int ignore, int invoking_key) - Complete the word at or before point. You have supplied the - function that does the initial simple matching selection algorithm - (see `completion_matches ()' and `rl_completion_entry_function'). - The default is to do filename completion. This calls - `rl_complete_internal ()' with an argument depending on - INVOKING_KEY. - - - Function: int rl_possible_completions (int count, int invoking_key)) - List the possible completions. See description of `rl_complete - ()'. This calls `rl_complete_internal ()' with an argument of `?'. - - - Function: int rl_insert_completions (int count, int invoking_key)) - Insert the list of possible completions into the line, deleting the - partially-completed word. See description of `rl_complete ()'. - This calls `rl_complete_internal ()' with an argument of `*'. - - - Function: char ** completion_matches (char *text, CPFunction - *entry_func) - Returns an array of `(char *)' which is a list of completions for - TEXT. If there are no completions, returns `(char **)NULL'. The - first entry in the returned array is the substitution for TEXT. - The remaining entries are the possible completions. The array is - terminated with a `NULL' pointer. - - ENTRY_FUNC is a function of two args, and returns a `(char *)'. - The first argument is TEXT. The second is a state argument; it is - zero on the first call, and non-zero on subsequent calls. - eNTRY_FUNC returns a `NULL' pointer to the caller when there are - no more matches. - - - Function: char * filename_completion_function (char *text, int state) - A generator function for filename completion in the general case. - Note that completion in Bash is a little different because of all - the pathnames that must be followed when looking up completions - for a command. The Bash source is a useful reference for writing - custom completion functions. - - - Function: char * username_completion_function (char *text, int state) - A completion generator for usernames. TEXT contains a partial - username preceded by a random character (usually `~'). As with all - completion generators, STATE is zero on the first call and non-zero - for subsequent calls. - - -File: readline.info, Node: Completion Variables, Next: A Short Completion Example, Prev: Completion Functions, Up: Custom Completers - -Completion Variables --------------------- - - - Variable: Function * rl_completion_entry_function - A pointer to the generator function for `completion_matches ()'. - `NULL' means to use `filename_entry_function ()', the default - filename completer. - - - Variable: CPPFunction * rl_attempted_completion_function - A pointer to an alternative function to create matches. The - function is called with TEXT, START, and END. START and END are - indices in `rl_line_buffer' saying what the boundaries of TEXT - are. If this function exists and returns `NULL', or if this - variable is set to `NULL', then `rl_complete ()' will call the - value of `rl_completion_entry_function' to generate matches, - otherwise the array of strings returned will be used. - - - Variable: int rl_completion_query_items - Up to this many items will be displayed in response to a - possible-completions call. After that, we ask the user if she is - sure she wants to see them all. The default value is 100. - - - Variable: char * rl_basic_word_break_characters - The basic list of characters that signal a break between words for - the completer routine. The default value of this variable is the - characters which break words for completion in Bash, i.e., `" - \t\n\"\\'`@$><=;|&{("'. - - - Variable: char * rl_completer_word_break_characters - The list of characters that signal a break between words for - `rl_complete_internal ()'. The default list is the value of - `rl_basic_word_break_characters'. - - - Variable: char * rl_special_prefixes - The list of characters that are word break characters, but should - be left in TEXT when it is passed to the completion function. - Programs can use this to help determine what kind of completing to - do. For instance, Bash sets this variable to "$@" so that it can - complete shell variables and hostnames. - - - Variable: int rl_ignore_completion_duplicates - If non-zero, then disallow duplicates in the matches. Default is - 1. - - - Variable: int rl_filename_completion_desired - Non-zero means that the results of the matches are to be treated as - filenames. This is *always* zero on entry, and can only be changed - within a completion entry generator function. If it is set to a - non-zero value, directory names have a slash appended and Readline - attempts to quote completed filenames if they contain any embedded - word break characters. - - - Variable: int rl_filename_quoting_desired - Non-zero means that the results of the matches are to be quoted - using double quotes (or an application-specific quoting mechanism) - if the completed filename contains any characters in - `rl_completer_word_break_chars'. This is *always* non-zero on - entry, and can only be changed within a completion entry generator - function. - - - Variable: Function * rl_ignore_some_completions_function - This function, if defined, is called by the completer when real - filename completion is done, after all the matching names have - been generated. It is passed a `NULL' terminated array of matches. - The first element (`matches[0]') is the maximal substring common - to all matches. This function can re-arrange the list of matches - as required, but each element deleted from the array must be freed. - - - Variable: char * rl_completer_quote_characters - List of characters which can be used to quote a substring of the - line. Completion occurs on the entire substring, and within the - substring `rl_completer_word_break_characters' are treated as any - other character, unless they also appear within this list. - - -File: readline.info, Node: A Short Completion Example, Prev: Completion Variables, Up: Custom Completers - -A Short Completion Example --------------------------- - - Here is a small application demonstrating the use of the GNU Readline -library. It is called `fileman', and the source code resides in -`examples/fileman.c'. This sample application provides completion of -command names, line editing features, and access to the history list. - - /* fileman.c -- A tiny application which demonstrates how to use the - GNU Readline library. This application interactively allows users - to manipulate files and their modes. */ - - #include <stdio.h> - #include <sys/types.h> - #include <sys/file.h> - #include <sys/stat.h> - #include <sys/errno.h> - - #include <readline/readline.h> - #include <readline/history.h> - - extern char *getwd (); - extern char *xmalloc (); - - /* The names of functions that actually do the manipulation. */ - int com_list (), com_view (), com_rename (), com_stat (), com_pwd (); - int com_delete (), com_help (), com_cd (), com_quit (); - - /* A structure which contains information on the commands this program - can understand. */ - - typedef struct { - char *name; /* User printable name of the function. */ - Function *func; /* Function to call to do the job. */ - char *doc; /* Documentation for this function. */ - } COMMAND; - - COMMAND commands[] = { - { "cd", com_cd, "Change to directory DIR" }, - { "delete", com_delete, "Delete FILE" }, - { "help", com_help, "Display this text" }, - { "?", com_help, "Synonym for `help'" }, - { "list", com_list, "List files in DIR" }, - { "ls", com_list, "Synonym for `list'" }, - { "pwd", com_pwd, "Print the current working directory" }, - { "quit", com_quit, "Quit using Fileman" }, - { "rename", com_rename, "Rename FILE to NEWNAME" }, - { "stat", com_stat, "Print out statistics on FILE" }, - { "view", com_view, "View the contents of FILE" }, - { (char *)NULL, (Function *)NULL, (char *)NULL } - }; - - /* Forward declarations. */ - char *stripwhite (); - COMMAND *find_command (); - - /* The name of this program, as taken from argv[0]. */ - char *progname; - - /* When non-zero, this global means the user is done using this program. */ - int done; - - char * - dupstr (s) - int s; - { - char *r; - - r = xmalloc (strlen (s) + 1); - strcpy (r, s); - return (r); - } - - main (argc, argv) - int argc; - char **argv; - { - char *line, *s; - - progname = argv[0]; - - initialize_readline (); /* Bind our completer. */ - - /* Loop reading and executing lines until the user quits. */ - for ( ; done == 0; ) - { - line = readline ("FileMan: "); - - if (!line) - break; - - /* Remove leading and trailing whitespace from the line. - Then, if there is anything left, add it to the history list - and execute it. */ - s = stripwhite (line); - - if (*s) - { - add_history (s); - execute_line (s); - } - - free (line); - } - exit (0); - } - - /* Execute a command line. */ - int - execute_line (line) - char *line; - { - register int i; - COMMAND *command; - char *word; - - /* Isolate the command word. */ - i = 0; - while (line[i] && whitespace (line[i])) - i++; - word = line + i; - - while (line[i] && !whitespace (line[i])) - i++; - - if (line[i]) - line[i++] = '\0'; - - command = find_command (word); - - if (!command) - { - fprintf (stderr, "%s: No such command for FileMan.\n", word); - return (-1); - } - - /* Get argument to command, if any. */ - while (whitespace (line[i])) - i++; - - word = line + i; - - /* Call the function. */ - return ((*(command->func)) (word)); - } - - /* Look up NAME as the name of a command, and return a pointer to that - command. Return a NULL pointer if NAME isn't a command name. */ - COMMAND * - find_command (name) - char *name; - { - register int i; - - for (i = 0; commands[i].name; i++) - if (strcmp (name, commands[i].name) == 0) - return (&commands[i]); - - return ((COMMAND *)NULL); - } - - /* Strip whitespace from the start and end of STRING. Return a pointer - into STRING. */ - char * - stripwhite (string) - char *string; - { - register char *s, *t; - - for (s = string; whitespace (*s); s++) - ; - - if (*s == 0) - return (s); - - t = s + strlen (s) - 1; - while (t > s && whitespace (*t)) - t--; - *++t = '\0'; - - return s; - } - - /* **************************************************************** */ - /* */ - /* Interface to Readline Completion */ - /* */ - /* **************************************************************** */ - - char *command_generator (); - char **fileman_completion (); - - /* Tell the GNU Readline library how to complete. We want to try to complete - on command names if this is the first word in the line, or on filenames - if not. */ - initialize_readline () - { - /* Allow conditional parsing of the ~/.inputrc file. */ - rl_readline_name = "FileMan"; - - /* Tell the completer that we want a crack first. */ - rl_attempted_completion_function = (CPPFunction *)fileman_completion; - } - - /* Attempt to complete on the contents of TEXT. START and END show the - region of TEXT that contains the word to complete. We can use the - entire line in case we want to do some simple parsing. Return the - array of matches, or NULL if there aren't any. */ - char ** - fileman_completion (text, start, end) - char *text; - int start, end; - { - char **matches; - - matches = (char **)NULL; - - /* If this word is at the start of the line, then it is a command - to complete. Otherwise it is the name of a file in the current - directory. */ - if (start == 0) - matches = completion_matches (text, command_generator); - - return (matches); - } - - /* Generator function for command completion. STATE lets us know whether - to start from scratch; without any state (i.e. STATE == 0), then we - start at the top of the list. */ - char * - command_generator (text, state) - char *text; - int state; - { - static int list_index, len; - char *name; - - /* If this is a new word to complete, initialize now. This includes - saving the length of TEXT for efficiency, and initializing the index - variable to 0. */ - if (!state) - { - list_index = 0; - len = strlen (text); - } - - /* Return the next name which partially matches from the command list. */ - while (name = commands[list_index].name) - { - list_index++; - - if (strncmp (name, text, len) == 0) - return (dupstr(name)); - } - - /* If no names matched, then return NULL. */ - return ((char *)NULL); - } - - /* **************************************************************** */ - /* */ - /* FileMan Commands */ - /* */ - /* **************************************************************** */ - - /* String to pass to system (). This is for the LIST, VIEW and RENAME - commands. */ - static char syscom[1024]; - - /* List the file(s) named in arg. */ - com_list (arg) - char *arg; - { - if (!arg) - arg = ""; - - sprintf (syscom, "ls -FClg %s", arg); - return (system (syscom)); - } - - com_view (arg) - char *arg; - { - if (!valid_argument ("view", arg)) - return 1; - - sprintf (syscom, "more %s", arg); - return (system (syscom)); - } - - com_rename (arg) - char *arg; - { - too_dangerous ("rename"); - return (1); - } - - com_stat (arg) - char *arg; - { - struct stat finfo; - - if (!valid_argument ("stat", arg)) - return (1); - - if (stat (arg, &finfo) == -1) - { - perror (arg); - return (1); - } - - printf ("Statistics for `%s':\n", arg); - - printf ("%s has %d link%s, and is %d byte%s in length.\n", arg, - finfo.st_nlink, - (finfo.st_nlink == 1) ? "" : "s", - finfo.st_size, - (finfo.st_size == 1) ? "" : "s"); - printf ("Inode Last Change at: %s", ctime (&finfo.st_ctime)); - printf (" Last access at: %s", ctime (&finfo.st_atime)); - printf (" Last modified at: %s", ctime (&finfo.st_mtime)); - return (0); - } - - com_delete (arg) - char *arg; - { - too_dangerous ("delete"); - return (1); - } - - /* Print out help for ARG, or for all of the commands if ARG is - not present. */ - com_help (arg) - char *arg; - { - register int i; - int printed = 0; - - for (i = 0; commands[i].name; i++) - { - if (!*arg || (strcmp (arg, commands[i].name) == 0)) - { - printf ("%s\t\t%s.\n", commands[i].name, commands[i].doc); - printed++; - } - } - - if (!printed) - { - printf ("No commands match `%s'. Possibilties are:\n", arg); - - for (i = 0; commands[i].name; i++) - { - /* Print in six columns. */ - if (printed == 6) - { - printed = 0; - printf ("\n"); - } - - printf ("%s\t", commands[i].name); - printed++; - } - - if (printed) - printf ("\n"); - } - return (0); - } - - /* Change to the directory ARG. */ - com_cd (arg) - char *arg; - { - if (chdir (arg) == -1) - { - perror (arg); - return 1; - } - - com_pwd (""); - return (0); - } - - /* Print out the current working directory. */ - com_pwd (ignore) - char *ignore; - { - char dir[1024], *s; - - s = getwd (dir); - if (s == 0) - { - printf ("Error getting pwd: %s\n", dir); - return 1; - } - - printf ("Current directory is %s\n", dir); - return 0; - } - - /* The user wishes to quit using this program. Just set DONE non-zero. */ - com_quit (arg) - char *arg; - { - done = 1; - return (0); - } - - /* Function which tells you that you can't do this. */ - too_dangerous (caller) - char *caller; - { - fprintf (stderr, - "%s: Too dangerous for me to distribute. Write it yourself.\n", - caller); - } - - /* Return non-zero if ARG is a valid argument for CALLER, else print - an error message and return zero. */ - int - valid_argument (caller, arg) - char *caller, *arg; - { - if (!arg || !*arg) - { - fprintf (stderr, "%s: Argument required.\n", caller); - return (0); - } - - return (1); - } - - -File: readline.info, Node: Concept Index, Next: Function and Variable Index, Prev: Programming with GNU Readline, Up: Top - -Concept Index -************* - -* Menu: - -* interaction, readline: Readline Interaction. -* Kill ring: Readline Killing Commands. -* Killing text: Readline Killing Commands. -* readline, function: Basic Behavior. -* Yanking text: Readline Killing Commands. - - -File: readline.info, Node: Function and Variable Index, Prev: Concept Index, Up: Top - -Function and Variable Index -*************************** - -* Menu: - -* $else: Conditional Init Constructs. -* $endif: Conditional Init Constructs. -* $if: Conditional Init Constructs. -* abort (C-g): Miscellaneous Commands. -* accept-line (Newline, Return): Commands For History. -* alphabetic: Utility Functions. -* backward-char (C-b): Commands For Moving. -* backward-delete-char (Rubout): Commands For Text. -* backward-kill-line (C-x Rubout): Commands For Killing. -* backward-kill-word (M-DEL): Commands For Killing. -* backward-word (M-b): Commands For Moving. -* beginning-of-history (M-<): Commands For History. -* beginning-of-line (C-a): Commands For Moving. -* bell-style: Readline Init Syntax. -* call-last-kbd-macro (C-x e): Keyboard Macros. -* capitalize-word (M-c): Commands For Text. -* clear-screen (C-l): Commands For Moving. -* comment-begin: Readline Init Syntax. -* complete (TAB): Commands For Completion. -* completion-query-items: Readline Init Syntax. -* completion_matches: Completion Functions. -* convert-meta: Readline Init Syntax. -* delete-char (C-d): Commands For Text. -* delete-horizontal-space (): Commands For Killing. -* digit-argument (M-0, M-1, ... M-): Numeric Arguments. -* digit_p: Utility Functions. -* digit_value: Utility Functions. -* ding: Utility Functions. -* do-uppercase-version (M-a, M-b, ...): Miscellaneous Commands. -* downcase-word (M-l): Commands For Text. -* dump-functions (): Miscellaneous Commands. -* editing-mode: Readline Init Syntax. -* end-kbd-macro (C-x )): Keyboard Macros. -* end-of-history (M->): Commands For History. -* end-of-line (C-e): Commands For Moving. -* expand-tilde: Readline Init Syntax. -* filename_completion_function: Completion Functions. -* forward-char (C-f): Commands For Moving. -* forward-search-history (C-s): Commands For History. -* forward-word (M-f): Commands For Moving. -* free_undo_list: Allowing Undoing. -* history-search-backward (): Commands For History. -* history-search-forward (): Commands For History. -* horizontal-scroll-mode: Readline Init Syntax. -* insert-completions (): Commands For Completion. -* keymap: Readline Init Syntax. -* kill-line (C-k): Commands For Killing. -* kill-whole-line (): Commands For Killing. -* kill-word (M-d): Commands For Killing. -* lowercase_p: Utility Functions. -* mark-modified-lines: Readline Init Syntax. -* meta-flag: Readline Init Syntax. -* next-history (C-n): Commands For History. -* non-incremental-forward-search-history (M-n): Commands For History. -* non-incremental-reverse-search-history (M-p): Commands For History. -* numeric: Utility Functions. -* output-meta: Readline Init Syntax. -* possible-completions (M-?): Commands For Completion. -* prefix-meta (ESC): Miscellaneous Commands. -* previous-history (C-p): Commands For History. -* quoted-insert (C-q, C-v): Commands For Text. -* re-read-init-file (C-x C-r): Miscellaneous Commands. -* readline: Basic Behavior. -* redraw-current-line (): Commands For Moving. -* reverse-search-history (C-r): Commands For History. -* revert-line (M-r): Miscellaneous Commands. -* rl_add_defun: Function Naming. -* rl_add_undo: Allowing Undoing. -* rl_attempted_completion_function: Completion Variables. -* rl_basic_word_break_characters: Completion Variables. -* rl_begin_undo_group: Allowing Undoing. -* rl_bind_key: Binding Keys. -* rl_bind_key_in_map: Binding Keys. -* rl_clear_message: Redisplay. -* rl_complete: How Completing Works. -* rl_complete: Completion Functions. -* rl_completer_quote_characters: Completion Variables. -* rl_completer_word_break_characters: Completion Variables. -* rl_complete_internal: Completion Functions. -* rl_completion_entry_function: Completion Variables. -* rl_completion_entry_function: How Completing Works. -* rl_completion_query_items: Completion Variables. -* rl_copy_keymap: Keymaps. -* rl_copy_text: Modifying Text. -* rl_delete_text: Modifying Text. -* rl_discard_keymap: Keymaps. -* rl_done: Function Writing. -* rl_do_undo: Allowing Undoing. -* rl_end: Function Writing. -* rl_end_undo_group: Allowing Undoing. -* rl_filename_completion_desired: Completion Variables. -* rl_filename_quoting_desired: Completion Variables. -* rl_forced_update_display: Redisplay. -* rl_function_of_keyseq: Associating Function Names and Bindings. -* rl_generic_bind: Binding Keys. -* rl_get_keymap: Keymaps. -* rl_get_keymap_by_name: Keymaps. -* rl_ignore_completion_duplicates: Completion Variables. -* rl_ignore_some_completions_function: Completion Variables. -* rl_insert_completions: Completion Functions. -* rl_insert_text: Modifying Text. -* rl_instream: Function Writing. -* rl_invoking_keyseqs: Associating Function Names and Bindings. -* rl_invoking_keyseqs_in_map: Associating Function Names and Bindings. -* rl_kill_text: Modifying Text. -* rl_line_buffer: Function Writing. -* rl_make_bare_keymap: Keymaps. -* rl_make_keymap: Keymaps. -* rl_mark: Function Writing. -* rl_message: Redisplay. -* rl_modifying: Allowing Undoing. -* rl_named_function: Associating Function Names and Bindings. -* rl_on_new_line: Redisplay. -* rl_outstream: Function Writing. -* rl_parse_and_bind: Binding Keys. -* rl_pending_input: Function Writing. -* rl_point: Function Writing. -* rl_possible_completions: Completion Functions. -* rl_prompt: Function Writing. -* rl_readline_name: Function Writing. -* rl_redisplay: Redisplay. -* rl_reset_line_state: Redisplay. -* rl_reset_terminal: Utility Functions. -* rl_set_keymap: Keymaps. -* rl_special_prefixes: Completion Variables. -* rl_startup_hook: Function Writing. -* rl_terminal_name: Function Writing. -* rl_unbind_key: Binding Keys. -* rl_unbind_key_in_map: Binding Keys. -* self-insert (a, b, A, 1, !, ...): Commands For Text. -* show-all-if-ambiguous: Readline Init Syntax. -* start-kbd-macro (C-x (): Keyboard Macros. -* tab-insert (M-TAB): Commands For Text. -* tilde-expand (M-~): Miscellaneous Commands. -* to_lower: Utility Functions. -* to_upper: Utility Functions. -* transpose-chars (C-t): Commands For Text. -* transpose-words (M-t): Commands For Text. -* undo (C-_, C-x C-u): Miscellaneous Commands. -* universal-argument (): Numeric Arguments. -* unix-line-discard (C-u): Commands For Killing. -* unix-word-rubout (C-w): Commands For Killing. -* upcase-word (M-u): Commands For Text. -* uppercase_p: Utility Functions. -* username_completion_function: Completion Functions. -* yank (C-y): Commands For Killing. -* yank-last-arg (M-., M-_): Commands For History. -* yank-nth-arg (M-C-y): Commands For History. -* yank-pop (M-y): Commands For Killing. - - diff --git a/gnu/lib/libreadline/doc/rlman.texinfo b/gnu/lib/libreadline/doc/rlman.texinfo deleted file mode 100644 index ec1406670a35..000000000000 --- a/gnu/lib/libreadline/doc/rlman.texinfo +++ /dev/null @@ -1,111 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@comment %**start of header (This is for running Texinfo on a region.) -@setfilename readline.info -@settitle GNU Readline Library -@comment %**end of header (This is for running Texinfo on a region.) -@synindex vr fn -@setchapternewpage odd - -@ignore -last change: Thu Jul 21 16:02:40 EDT 1994 -@end ignore - -@set EDITION 2.0 -@set VERSION 2.0 -@set UPDATED 21 July 1994 -@set UPDATE-MONTH July 1994 - -@ifinfo -This document describes the GNU Readline Library, a utility which aids -in the consistency of user interface across discrete programs that need -to provide a command line interface. - -Copyright (C) 1988, 1991 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -pare preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. -@end ifinfo - -@titlepage -@sp 10 -@title GNU Readline Library -@subtitle Edition @value{EDITION}, for @code{Readline Library} Version @value{VERSION}. -@subtitle @value{UPDATE-MONTH} -@author Brian Fox, Free Software Foundation -@author Chet Ramey, Case Western Reserve University - -@page -This document describes the GNU Readline Library, a utility which aids -in the consistency of user interface across discrete programs that need -to provide a command line interface. - -Published by the Free Software Foundation @* -675 Massachusetts Avenue, @* -Cambridge, MA 02139 USA - -Permission is granted to make and distribute verbatim copies of -this manual provided the copyright notice and this permission notice -are preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that this permission notice may be stated in a translation approved -by the Foundation. - -@vskip 0pt plus 1filll -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -@end titlepage - -@ifinfo -@node Top -@top GNU Readline Library - -This document describes the GNU Readline Library, a utility which aids -in the consistency of user interface across discrete programs that need -to provide a command line interface. - -@menu -* Command Line Editing:: GNU Readline User's Manual. -* Programming with GNU Readline:: GNU Readline Programmer's Manual. -* Concept Index:: Index of concepts described in this manual. -* Function and Variable Index:: Index of externally visible functions - and variables. -@end menu -@end ifinfo - -@include rluser.texinfo -@include rltech.texinfo - -@node Concept Index -@unnumbered Concept Index -@printindex cp - -@node Function and Variable Index -@unnumbered Function and Variable Index -@printindex fn - -@contents -@bye diff --git a/gnu/lib/libreadline/doc/texindex.c b/gnu/lib/libreadline/doc/texindex.c deleted file mode 100644 index 9233bab12690..000000000000 --- a/gnu/lib/libreadline/doc/texindex.c +++ /dev/null @@ -1,1666 +0,0 @@ -/* Prepare TeX index dribble output into an actual index. - - Version 1.45 - - Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - - -#include <stdio.h> -#include <ctype.h> -#include <errno.h> -#include "getopt.h" -#include "bashansi.h" - -#if !defined (errno) -extern int errno; -#endif - -#if defined (HAVE_UNISTD_H) -# include <unistd.h> -#else /* !HAVE_UNISTD_H */ -extern long lseek (); -#endif /* !HAVE_UNISTD_H */ - -extern char *mktemp (); - -#if !defined (HAVE_STRERROR) -extern int sys_nerr; -extern char *sys_errlist[]; -#endif - -#include <sys/types.h> - -#if defined (_AIX) || !defined (_POSIX_VERSION) -# include <sys/file.h> -#endif - -#include <fcntl.h> - -#define TI_NO_ERROR 0 -#define TI_FATAL_ERROR 1 - -#if !defined (SEEK_SET) -# define SEEK_SET 0 -# define SEEK_CUR 1 -# define SEEK_END 2 -#endif /* !SEEK_SET */ - -/* When sorting in core, this structure describes one line - and the position and length of its first keyfield. */ -struct lineinfo -{ - char *text; /* The actual text of the line. */ - union { - char *text; /* The start of the key (for textual comparison). */ - long number; /* The numeric value (for numeric comparison). */ - } key; - long keylen; /* Length of KEY field. */ -}; - -/* This structure describes a field to use as a sort key. */ -struct keyfield -{ - int startwords; /* Number of words to skip. */ - int startchars; /* Number of additional chars to skip. */ - int endwords; /* Number of words to ignore at end. */ - int endchars; /* Ditto for characters of last word. */ - char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ - char fold_case; /* Non-zero means case doesn't matter. */ - char reverse; /* Non-zero means compare in reverse order. */ - char numeric; /* Non-zeros means field is ASCII numeric. */ - char positional; /* Sort according to file position. */ - char braced; /* Count balanced-braced groupings as fields. */ -}; - -/* Vector of keyfields to use. */ -struct keyfield keyfields[3]; - -/* Number of keyfields stored in that vector. */ -int num_keyfields = 3; - -/* Vector of input file names, terminated with a null pointer. */ -char **infiles; - -/* Vector of corresponding output file names, or NULL, meaning default it - (add an `s' to the end). */ -char **outfiles; - -/* Length of `infiles'. */ -int num_infiles; - -/* Pointer to the array of pointers to lines being sorted. */ -char **linearray; - -/* The allocated length of `linearray'. */ -long nlines; - -/* Directory to use for temporary files. On Unix, it ends with a slash. */ -char *tempdir; - -/* Start of filename to use for temporary files. */ -char *tempbase; - -/* Number of last temporary file. */ -int tempcount; - -/* Number of last temporary file already deleted. - Temporary files are deleted by `flush_tempfiles' in order of creation. */ -int last_deleted_tempcount; - -/* During in-core sort, this points to the base of the data block - which contains all the lines of data. */ -char *text_base; - -/* Additional command switches .*/ - -/* Nonzero means do not delete tempfiles -- for debugging. */ -int keep_tempfiles; - -/* The name this program was run with. */ -char *program_name; - -/* Forward declarations of functions in this file. */ - -void decode_command (); -void sort_in_core (); -void sort_offline (); -char **parsefile (); -char *find_field (); -char *find_pos (); -long find_value (); -char *find_braced_pos (); -char *find_braced_end (); -void writelines (); -int compare_field (); -int compare_full (); -long readline (); -int merge_files (); -int merge_direct (); -void pfatal_with_name (); -void fatal (); -void error (); -void *xmalloc (), *xrealloc (); -char *concat (); -char *maketempname (); -void flush_tempfiles (); -char *tempcopy (); - -#define MAX_IN_CORE_SORT 500000 - -void -main (argc, argv) - int argc; - char **argv; -{ - int i; - - tempcount = 0; - last_deleted_tempcount = 0; - program_name = argv[0]; - - /* Describe the kind of sorting to do. */ - /* The first keyfield uses the first braced field and folds case. */ - keyfields[0].braced = 1; - keyfields[0].fold_case = 1; - keyfields[0].endwords = -1; - keyfields[0].endchars = -1; - - /* The second keyfield uses the second braced field, numerically. */ - keyfields[1].braced = 1; - keyfields[1].numeric = 1; - keyfields[1].startwords = 1; - keyfields[1].endwords = -1; - keyfields[1].endchars = -1; - - /* The third keyfield (which is ignored while discarding duplicates) - compares the whole line. */ - keyfields[2].endwords = -1; - keyfields[2].endchars = -1; - - decode_command (argc, argv); - - tempbase = mktemp (concat ("txiXXXXXX", "", "")); - - /* Process input files completely, one by one. */ - - for (i = 0; i < num_infiles; i++) - { - int desc; - long ptr; - char *outfile; - - desc = open (infiles[i], O_RDONLY, 0); - if (desc < 0) - pfatal_with_name (infiles[i]); - lseek (desc, 0L, SEEK_END); - ptr = lseek (desc, 0L, SEEK_CUR); - - close (desc); - - outfile = outfiles[i]; - if (!outfile) - { - outfile = concat (infiles[i], "s", ""); - } - - if (ptr < MAX_IN_CORE_SORT) - /* Sort a small amount of data. */ - sort_in_core (infiles[i], ptr, outfile); - else - sort_offline (infiles[i], ptr, outfile); - } - - flush_tempfiles (tempcount); - exit (TI_NO_ERROR); -} - -void -usage () -{ - fprintf (stderr, "\ -Usage: %s [-k] infile [-o outfile] ...\n", program_name); - exit (1); -} - -/* Decode the command line arguments to set the parameter variables - and set up the vector of keyfields and the vector of input files. */ - -void -decode_command (argc, argv) - int argc; - char **argv; -{ - int optc; - char **ip; - char **op; - - /* Store default values into parameter variables. */ - - tempdir = getenv ("TMPDIR"); - if (tempdir == NULL) - tempdir = "/tmp/"; - else - tempdir = concat (tempdir, "/", ""); - - keep_tempfiles = 0; - - /* Allocate ARGC input files, which must be enough. */ - - infiles = (char **) xmalloc (argc * sizeof (char *)); - outfiles = (char **) xmalloc (argc * sizeof (char *)); - ip = infiles; - op = outfiles; - - while ((optc = getopt (argc, argv, "-ko:")) != EOF) - { - switch (optc) - { - case 1: /* Non-option filename. */ - *ip++ = optarg; - *op++ = NULL; - break; - - case 'k': - keep_tempfiles = 1; - break; - - case 'o': - if (op > outfiles) - *(op - 1) = optarg; - break; - - default: - usage (); - } - } - - /* Record number of keyfields and terminate list of filenames. */ - num_infiles = ip - infiles; - *ip = 0; - if (num_infiles == 0) - usage (); -} - -/* Return a name for a temporary file. */ - -char * -maketempname (count) - int count; -{ - char tempsuffix[10]; - sprintf (tempsuffix, "%d", count); - return concat (tempdir, tempbase, tempsuffix); -} - -/* Delete all temporary files up to TO_COUNT. */ - -void -flush_tempfiles (to_count) - int to_count; -{ - if (keep_tempfiles) - return; - while (last_deleted_tempcount < to_count) - unlink (maketempname (++last_deleted_tempcount)); -} - -/* Copy the input file open on IDESC into a temporary file - and return the temporary file name. */ - -#define BUFSIZE 1024 - -char * -tempcopy (idesc) - int idesc; -{ - char *outfile = maketempname (++tempcount); - int odesc; - char buffer[BUFSIZE]; - - odesc = open (outfile, O_WRONLY | O_CREAT, 0666); - - if (odesc < 0) - pfatal_with_name (outfile); - - while (1) - { - int nread = read (idesc, buffer, BUFSIZE); - write (odesc, buffer, nread); - if (!nread) - break; - } - - close (odesc); - - return outfile; -} - -/* Compare LINE1 and LINE2 according to the specified set of keyfields. */ - -int -compare_full (line1, line2) - char **line1, **line2; -{ - int i; - - /* Compare using the first keyfield; - if that does not distinguish the lines, try the second keyfield; - and so on. */ - - for (i = 0; i < num_keyfields; i++) - { - long length1, length2; - char *start1 = find_field (&keyfields[i], *line1, &length1); - char *start2 = find_field (&keyfields[i], *line2, &length2); - int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, - start2, length2, *line2 - text_base); - if (tem) - { - if (keyfields[i].reverse) - return -tem; - return tem; - } - } - - return 0; /* Lines match exactly. */ -} - -/* Compare LINE1 and LINE2, described by structures - in which the first keyfield is identified in advance. - For positional sorting, assumes that the order of the lines in core - reflects their nominal order. */ - -int -compare_prepared (line1, line2) - struct lineinfo *line1, *line2; -{ - int i; - int tem; - char *text1, *text2; - - /* Compare using the first keyfield, which has been found for us already. */ - if (keyfields->positional) - { - if (line1->text - text_base > line2->text - text_base) - tem = 1; - else - tem = -1; - } - else if (keyfields->numeric) - tem = line1->key.number - line2->key.number; - else - tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, - line2->key.text, line2->keylen, 0); - if (tem) - { - if (keyfields->reverse) - return -tem; - return tem; - } - - text1 = line1->text; - text2 = line2->text; - - /* Compare using the second keyfield; - if that does not distinguish the lines, try the third keyfield; - and so on. */ - - for (i = 1; i < num_keyfields; i++) - { - long length1, length2; - char *start1 = find_field (&keyfields[i], text1, &length1); - char *start2 = find_field (&keyfields[i], text2, &length2); - int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, - start2, length2, text2 - text_base); - if (tem) - { - if (keyfields[i].reverse) - return -tem; - return tem; - } - } - - return 0; /* Lines match exactly. */ -} - -/* Like compare_full but more general. - You can pass any strings, and you can say how many keyfields to use. - POS1 and POS2 should indicate the nominal positional ordering of - the two lines in the input. */ - -int -compare_general (str1, str2, pos1, pos2, use_keyfields) - char *str1, *str2; - long pos1, pos2; - int use_keyfields; -{ - int i; - - /* Compare using the first keyfield; - if that does not distinguish the lines, try the second keyfield; - and so on. */ - - for (i = 0; i < use_keyfields; i++) - { - long length1, length2; - char *start1 = find_field (&keyfields[i], str1, &length1); - char *start2 = find_field (&keyfields[i], str2, &length2); - int tem = compare_field (&keyfields[i], start1, length1, pos1, - start2, length2, pos2); - if (tem) - { - if (keyfields[i].reverse) - return -tem; - return tem; - } - } - - return 0; /* Lines match exactly. */ -} - -/* Find the start and length of a field in STR according to KEYFIELD. - A pointer to the starting character is returned, and the length - is stored into the int that LENGTHPTR points to. */ - -char * -find_field (keyfield, str, lengthptr) - struct keyfield *keyfield; - char *str; - long *lengthptr; -{ - char *start; - char *end; - char *(*fun) (); - - if (keyfield->braced) - fun = find_braced_pos; - else - fun = find_pos; - - start = (*fun) (str, keyfield->startwords, keyfield->startchars, - keyfield->ignore_blanks); - if (keyfield->endwords < 0) - { - if (keyfield->braced) - end = find_braced_end (start); - else - { - end = start; - while (*end && *end != '\n') - end++; - } - } - else - { - end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); - if (end - str < start - str) - end = start; - } - *lengthptr = end - start; - return start; -} - -/* Return a pointer to a specified place within STR, - skipping (from the beginning) WORDS words and then CHARS chars. - If IGNORE_BLANKS is nonzero, we skip all blanks - after finding the specified word. */ - -char * -find_pos (str, words, chars, ignore_blanks) - char *str; - int words, chars; - int ignore_blanks; -{ - int i; - char *p = str; - - for (i = 0; i < words; i++) - { - char c; - /* Find next bunch of nonblanks and skip them. */ - while ((c = *p) == ' ' || c == '\t') - p++; - while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) - p++; - if (!*p || *p == '\n') - return p; - } - - while (*p == ' ' || *p == '\t') - p++; - - for (i = 0; i < chars; i++) - { - if (!*p || *p == '\n') - break; - p++; - } - return p; -} - -/* Like find_pos but assumes that each field is surrounded by braces - and that braces within fields are balanced. */ - -char * -find_braced_pos (str, words, chars, ignore_blanks) - char *str; - int words, chars; - int ignore_blanks; -{ - int i; - int bracelevel; - char *p = str; - char c; - - for (i = 0; i < words; i++) - { - bracelevel = 1; - while ((c = *p++) != '{' && c != '\n' && c) - /* Do nothing. */ ; - if (c != '{') - return p - 1; - while (bracelevel) - { - c = *p++; - if (c == '{') - bracelevel++; - if (c == '}') - bracelevel--; - if (c == 0 || c == '\n') - return p - 1; - } - } - - while ((c = *p++) != '{' && c != '\n' && c) - /* Do nothing. */ ; - - if (c != '{') - return p - 1; - - if (ignore_blanks) - while ((c = *p) == ' ' || c == '\t') - p++; - - for (i = 0; i < chars; i++) - { - if (!*p || *p == '\n') - break; - p++; - } - return p; -} - -/* Find the end of the balanced-brace field which starts at STR. - The position returned is just before the closing brace. */ - -char * -find_braced_end (str) - char *str; -{ - int bracelevel; - char *p = str; - char c; - - bracelevel = 1; - while (bracelevel) - { - c = *p++; - if (c == '{') - bracelevel++; - if (c == '}') - bracelevel--; - if (c == 0 || c == '\n') - return p - 1; - } - return p - 1; -} - -long -find_value (start, length) - char *start; - long length; -{ - while (length != 0L) - { - if (isdigit (*start)) - return atol (start); - length--; - start++; - } - return 0l; -} - -/* Vector used to translate characters for comparison. - This is how we make all alphanumerics follow all else, - and ignore case in the first sorting. */ -int char_order[256]; - -void -init_char_order () -{ - int i; - for (i = 1; i < 256; i++) - char_order[i] = i; - - for (i = '0'; i <= '9'; i++) - char_order[i] += 512; - - for (i = 'a'; i <= 'z'; i++) - { - char_order[i] = 512 + i; - char_order[i + 'A' - 'a'] = 512 + i; - } -} - -/* Compare two fields (each specified as a start pointer and a character count) - according to KEYFIELD. - The sign of the value reports the relation between the fields. */ - -int -compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) - struct keyfield *keyfield; - char *start1; - long length1; - long pos1; - char *start2; - long length2; - long pos2; -{ - if (keyfields->positional) - { - if (pos1 > pos2) - return 1; - else - return -1; - } - if (keyfield->numeric) - { - long value = find_value (start1, length1) - find_value (start2, length2); - if (value > 0) - return 1; - if (value < 0) - return -1; - return 0; - } - else - { - char *p1 = start1; - char *p2 = start2; - char *e1 = start1 + length1; - char *e2 = start2 + length2; - - while (1) - { - int c1, c2; - - if (p1 == e1) - c1 = 0; - else - c1 = *p1++; - if (p2 == e2) - c2 = 0; - else - c2 = *p2++; - - if (char_order[c1] != char_order[c2]) - return char_order[c1] - char_order[c2]; - if (!c1) - break; - } - - /* Strings are equal except possibly for case. */ - p1 = start1; - p2 = start2; - while (1) - { - int c1, c2; - - if (p1 == e1) - c1 = 0; - else - c1 = *p1++; - if (p2 == e2) - c2 = 0; - else - c2 = *p2++; - - if (c1 != c2) - /* Reverse sign here so upper case comes out last. */ - return c2 - c1; - if (!c1) - break; - } - - return 0; - } -} - -/* A `struct linebuffer' is a structure which holds a line of text. - `readline' reads a line from a stream into a linebuffer - and works regardless of the length of the line. */ - -struct linebuffer -{ - long size; - char *buffer; -}; - -/* Initialize LINEBUFFER for use. */ - -void -initbuffer (linebuffer) - struct linebuffer *linebuffer; -{ - linebuffer->size = 200; - linebuffer->buffer = (char *) xmalloc (200); -} - -/* Read a line of text from STREAM into LINEBUFFER. - Return the length of the line. */ - -long -readline (linebuffer, stream) - struct linebuffer *linebuffer; - FILE *stream; -{ - char *buffer = linebuffer->buffer; - char *p = linebuffer->buffer; - char *end = p + linebuffer->size; - - while (1) - { - int c = getc (stream); - if (p == end) - { - buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); - p += buffer - linebuffer->buffer; - end += buffer - linebuffer->buffer; - linebuffer->buffer = buffer; - } - if (c < 0 || c == '\n') - { - *p = 0; - break; - } - *p++ = c; - } - - return p - buffer; -} - -/* Sort an input file too big to sort in core. */ - -void -sort_offline (infile, nfiles, total, outfile) - char *infile; - int nfiles; - long total; - char *outfile; -{ - /* More than enough. */ - int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; - char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); - FILE *istream = fopen (infile, "r"); - int i; - struct linebuffer lb; - long linelength; - int failure = 0; - - initbuffer (&lb); - - /* Read in one line of input data. */ - - linelength = readline (&lb, istream); - - if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') - { - error ("%s: not a texinfo index file", infile); - return; - } - - /* Split up the input into `ntemps' temporary files, or maybe fewer, - and put the new files' names into `tempfiles' */ - - for (i = 0; i < ntemps; i++) - { - char *outname = maketempname (++tempcount); - FILE *ostream = fopen (outname, "w"); - long tempsize = 0; - - if (!ostream) - pfatal_with_name (outname); - tempfiles[i] = outname; - - /* Copy lines into this temp file as long as it does not make file - "too big" or until there are no more lines. */ - - while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) - { - tempsize += linelength + 1; - fputs (lb.buffer, ostream); - putc ('\n', ostream); - - /* Read another line of input data. */ - - linelength = readline (&lb, istream); - if (!linelength && feof (istream)) - break; - - if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') - { - error ("%s: not a texinfo index file", infile); - failure = 1; - goto fail; - } - } - fclose (ostream); - if (feof (istream)) - break; - } - - free (lb.buffer); - -fail: - /* Record number of temp files we actually needed. */ - - ntemps = i; - - /* Sort each tempfile into another tempfile. - Delete the first set of tempfiles and put the names of the second - into `tempfiles'. */ - - for (i = 0; i < ntemps; i++) - { - char *newtemp = maketempname (++tempcount); - sort_in_core (&tempfiles[i], MAX_IN_CORE_SORT, newtemp); - if (!keep_tempfiles) - unlink (tempfiles[i]); - tempfiles[i] = newtemp; - } - - if (failure) - return; - - /* Merge the tempfiles together and indexify. */ - - merge_files (tempfiles, ntemps, outfile); -} - -/* Sort INFILE, whose size is TOTAL, - assuming that is small enough to be done in-core, - then indexify it and send the output to OUTFILE (or to stdout). */ - -void -sort_in_core (infile, total, outfile) - char *infile; - long total; - char *outfile; -{ - char **nextline; - char *data = (char *) xmalloc (total + 1); - char *file_data; - long file_size; - int i; - FILE *ostream = stdout; - struct lineinfo *lineinfo; - - /* Read the contents of the file into the moby array `data'. */ - - int desc = open (infile, O_RDONLY, 0); - - if (desc < 0) - fatal ("failure reopening %s", infile); - for (file_size = 0;;) - { - i = read (desc, data + file_size, total - file_size); - if (i <= 0) - break; - file_size += i; - } - file_data = data; - data[file_size] = 0; - - close (desc); - - if (file_size > 0 && data[0] != '\\' && data[0] != '@') - { - error ("%s: not a texinfo index file", infile); - return; - } - - init_char_order (); - - /* Sort routines want to know this address. */ - - text_base = data; - - /* Create the array of pointers to lines, with a default size - frequently enough. */ - - nlines = total / 50; - if (!nlines) - nlines = 2; - linearray = (char **) xmalloc (nlines * sizeof (char *)); - - /* `nextline' points to the next free slot in this array. - `nlines' is the allocated size. */ - - nextline = linearray; - - /* Parse the input file's data, and make entries for the lines. */ - - nextline = parsefile (infile, nextline, file_data, file_size); - if (nextline == 0) - { - error ("%s: not a texinfo index file", infile); - return; - } - - /* Sort the lines. */ - - /* If we have enough space, find the first keyfield of each line in advance. - Make a `struct lineinfo' for each line, which records the keyfield - as well as the line, and sort them. */ - - lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); - - if (lineinfo) - { - struct lineinfo *lp; - char **p; - - for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) - { - lp->text = *p; - lp->key.text = find_field (keyfields, *p, &lp->keylen); - if (keyfields->numeric) - lp->key.number = find_value (lp->key.text, lp->keylen); - } - - qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), compare_prepared); - - for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) - *p = lp->text; - - free (lineinfo); - } - else - qsort (linearray, nextline - linearray, sizeof (char *), compare_full); - - /* Open the output file. */ - - if (outfile) - { - ostream = fopen (outfile, "w"); - if (!ostream) - pfatal_with_name (outfile); - } - - writelines (linearray, nextline - linearray, ostream); - if (outfile) - fclose (ostream); - - free (linearray); - free (data); -} - -/* Parse an input string in core into lines. - DATA is the input string, and SIZE is its length. - Data goes in LINEARRAY starting at NEXTLINE. - The value returned is the first entry in LINEARRAY still unused. - Value 0 means input file contents are invalid. */ - -char ** -parsefile (filename, nextline, data, size) - char *filename; - char **nextline; - char *data; - long size; -{ - char *p, *end; - char **line = nextline; - - p = data; - end = p + size; - *end = 0; - - while (p != end) - { - if (p[0] != '\\' && p[0] != '@') - return 0; - - *line = p; - while (*p && *p != '\n') - p++; - if (p != end) - p++; - - line++; - if (line == linearray + nlines) - { - char **old = linearray; - linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4)); - line += linearray - old; - } - } - - return line; -} - -/* Indexification is a filter applied to the sorted lines - as they are being written to the output file. - Multiple entries for the same name, with different page numbers, - get combined into a single entry with multiple page numbers. - The first braced field, which is used for sorting, is discarded. - However, its first character is examined, folded to lower case, - and if it is different from that in the previous line fed to us - a \initial line is written with one argument, the new initial. - - If an entry has four braced fields, then the second and third - constitute primary and secondary names. - In this case, each change of primary name - generates a \primary line which contains only the primary name, - and in between these are \secondary lines which contain - just a secondary name and page numbers. */ - -/* The last primary name we wrote a \primary entry for. - If only one level of indexing is being done, this is the last name seen. */ -char *lastprimary; -/* Length of storage allocated for lastprimary. */ -int lastprimarylength; - -/* Similar, for the secondary name. */ -char *lastsecondary; -int lastsecondarylength; - -/* Zero if we are not in the middle of writing an entry. - One if we have written the beginning of an entry but have not - yet written any page numbers into it. - Greater than one if we have written the beginning of an entry - plus at least one page number. */ -int pending; - -/* The initial (for sorting purposes) of the last primary entry written. - When this changes, a \initial {c} line is written */ - -char *lastinitial; - -int lastinitiallength; - -/* When we need a string of length 1 for the value of lastinitial, - store it here. */ - -char lastinitial1[2]; - -/* Initialize static storage for writing an index. */ - -static void -xbzero(s, n) - char *s; - int n; -{ - register char *p; - for (p = s; n--; ) - *p++ = '\0'; -} - -void -init_index () -{ - pending = 0; - lastinitial = lastinitial1; - lastinitial1[0] = 0; - lastinitial1[1] = 0; - lastinitiallength = 0; - lastprimarylength = 100; - lastprimary = (char *) xmalloc (lastprimarylength + 1); - xbzero (lastprimary, lastprimarylength + 1); - lastsecondarylength = 100; - lastsecondary = (char *) xmalloc (lastsecondarylength + 1); - xbzero (lastsecondary, lastsecondarylength + 1); -} - -/* Indexify. Merge entries for the same name, - insert headers for each initial character, etc. */ - -void -indexify (line, ostream) - char *line; - FILE *ostream; -{ - char *primary, *secondary, *pagenumber; - int primarylength, secondarylength = 0, pagelength; - int nosecondary; - int initiallength; - char *initial; - char initial1[2]; - register char *p; - - /* First, analyze the parts of the entry fed to us this time. */ - - p = find_braced_pos (line, 0, 0, 0); - if (*p == '{') - { - initial = p; - /* Get length of inner pair of braces starting at `p', - including that inner pair of braces. */ - initiallength = find_braced_end (p + 1) + 1 - p; - } - else - { - initial = initial1; - initial1[0] = *p; - initial1[1] = 0; - initiallength = 1; - - if (initial1[0] >= 'a' && initial1[0] <= 'z') - initial1[0] -= 040; - } - - pagenumber = find_braced_pos (line, 1, 0, 0); - pagelength = find_braced_end (pagenumber) - pagenumber; - if (pagelength == 0) - abort (); - - primary = find_braced_pos (line, 2, 0, 0); - primarylength = find_braced_end (primary) - primary; - - secondary = find_braced_pos (line, 3, 0, 0); - nosecondary = !*secondary; - if (!nosecondary) - secondarylength = find_braced_end (secondary) - secondary; - - /* If the primary is different from before, make a new primary entry. */ - if (strncmp (primary, lastprimary, primarylength)) - { - /* Close off current secondary entry first, if one is open. */ - if (pending) - { - fputs ("}\n", ostream); - pending = 0; - } - - /* If this primary has a different initial, include an entry for - the initial. */ - if (initiallength != lastinitiallength || - strncmp (initial, lastinitial, initiallength)) - { - fprintf (ostream, "\\initial {"); - fwrite (initial, 1, initiallength, ostream); - fprintf (ostream, "}\n", initial); - if (initial == initial1) - { - lastinitial = lastinitial1; - *lastinitial1 = *initial1; - } - else - { - lastinitial = initial; - } - lastinitiallength = initiallength; - } - - /* Make the entry for the primary. */ - if (nosecondary) - fputs ("\\entry {", ostream); - else - fputs ("\\primary {", ostream); - fwrite (primary, primarylength, 1, ostream); - if (nosecondary) - { - fputs ("}{", ostream); - pending = 1; - } - else - fputs ("}\n", ostream); - - /* Record name of most recent primary. */ - if (lastprimarylength < primarylength) - { - lastprimarylength = primarylength + 100; - lastprimary = (char *) xrealloc (lastprimary, - 1 + lastprimarylength); - } - strncpy (lastprimary, primary, primarylength); - lastprimary[primarylength] = 0; - - /* There is no current secondary within this primary, now. */ - lastsecondary[0] = 0; - } - - /* Should not have an entry with no subtopic following one with a subtopic. */ - - if (nosecondary && *lastsecondary) - error ("entry %s follows an entry with a secondary name", line); - - /* Start a new secondary entry if necessary. */ - if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) - { - if (pending) - { - fputs ("}\n", ostream); - pending = 0; - } - - /* Write the entry for the secondary. */ - fputs ("\\secondary {", ostream); - fwrite (secondary, secondarylength, 1, ostream); - fputs ("}{", ostream); - pending = 1; - - /* Record name of most recent secondary. */ - if (lastsecondarylength < secondarylength) - { - lastsecondarylength = secondarylength + 100; - lastsecondary = (char *) xrealloc (lastsecondary, - 1 + lastsecondarylength); - } - strncpy (lastsecondary, secondary, secondarylength); - lastsecondary[secondarylength] = 0; - } - - /* Here to add one more page number to the current entry. */ - if (pending++ != 1) - fputs (", ", ostream); /* Punctuate first, if this is not the first. */ - fwrite (pagenumber, pagelength, 1, ostream); -} - -/* Close out any unfinished output entry. */ - -void -finish_index (ostream) - FILE *ostream; -{ - if (pending) - fputs ("}\n", ostream); - free (lastprimary); - free (lastsecondary); -} - -/* Copy the lines in the sorted order. - Each line is copied out of the input file it was found in. */ - -void -writelines (linearray, nlines, ostream) - char **linearray; - int nlines; - FILE *ostream; -{ - char **stop_line = linearray + nlines; - char **next_line; - - init_index (); - - /* Output the text of the lines, and free the buffer space. */ - - for (next_line = linearray; next_line != stop_line; next_line++) - { - /* If -u was specified, output the line only if distinct from previous one. */ - if (next_line == linearray - /* Compare previous line with this one, using only the - explicitly specd keyfields. */ - || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) - { - char *p = *next_line; - char c; - - while ((c = *p++) && c != '\n') - /* Do nothing. */ ; - *(p - 1) = 0; - indexify (*next_line, ostream); - } - } - - finish_index (ostream); -} - -/* Assume (and optionally verify) that each input file is sorted; - merge them and output the result. - Returns nonzero if any input file fails to be sorted. - - This is the high-level interface that can handle an unlimited - number of files. */ - -#define MAX_DIRECT_MERGE 10 - -int -merge_files (infiles, nfiles, outfile) - char **infiles; - int nfiles; - char *outfile; -{ - char **tempfiles; - int ntemps; - int i; - int value = 0; - int start_tempcount = tempcount; - - if (nfiles <= MAX_DIRECT_MERGE) - return merge_direct (infiles, nfiles, outfile); - - /* Merge groups of MAX_DIRECT_MERGE input files at a time, - making a temporary file to hold each group's result. */ - - ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; - tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); - for (i = 0; i < ntemps; i++) - { - int nf = MAX_DIRECT_MERGE; - if (i + 1 == ntemps) - nf = nfiles - i * MAX_DIRECT_MERGE; - tempfiles[i] = maketempname (++tempcount); - value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); - } - - /* All temporary files that existed before are no longer needed - since their contents have been merged into our new tempfiles. - So delete them. */ - flush_tempfiles (start_tempcount); - - /* Now merge the temporary files we created. */ - - merge_files (tempfiles, ntemps, outfile); - - free (tempfiles); - - return value; -} - -/* Assume (and optionally verify) that each input file is sorted; - merge them and output the result. - Returns nonzero if any input file fails to be sorted. - - This version of merging will not work if the number of - input files gets too high. Higher level functions - use it only with a bounded number of input files. */ - -int -merge_direct (infiles, nfiles, outfile) - char **infiles; - int nfiles; - char *outfile; -{ - struct linebuffer *lb1, *lb2; - struct linebuffer **thisline, **prevline; - FILE **streams; - int i; - int nleft; - int lossage = 0; - int *file_lossage; - struct linebuffer *prev_out = 0; - FILE *ostream = stdout; - - if (outfile) - { - ostream = fopen (outfile, "w"); - } - if (!ostream) - pfatal_with_name (outfile); - - init_index (); - - if (nfiles == 0) - { - if (outfile) - fclose (ostream); - return 0; - } - - /* For each file, make two line buffers. - Also, for each file, there is an element of `thisline' - which points at any time to one of the file's two buffers, - and an element of `prevline' which points to the other buffer. - `thisline' is supposed to point to the next available line from the file, - while `prevline' holds the last file line used, - which is remembered so that we can verify that the file is properly sorted. */ - - /* lb1 and lb2 contain one buffer each per file. */ - lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); - lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); - - /* thisline[i] points to the linebuffer holding the next available line in file i, - or is zero if there are no lines left in that file. */ - thisline = (struct linebuffer **) - xmalloc (nfiles * sizeof (struct linebuffer *)); - /* prevline[i] points to the linebuffer holding the last used line - from file i. This is just for verifying that file i is properly - sorted. */ - prevline = (struct linebuffer **) - xmalloc (nfiles * sizeof (struct linebuffer *)); - /* streams[i] holds the input stream for file i. */ - streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); - /* file_lossage[i] is nonzero if we already know file i is not - properly sorted. */ - file_lossage = (int *) xmalloc (nfiles * sizeof (int)); - - /* Allocate and initialize all that storage. */ - - for (i = 0; i < nfiles; i++) - { - initbuffer (&lb1[i]); - initbuffer (&lb2[i]); - thisline[i] = &lb1[i]; - prevline[i] = &lb2[i]; - file_lossage[i] = 0; - streams[i] = fopen (infiles[i], "r"); - if (!streams[i]) - pfatal_with_name (infiles[i]); - - readline (thisline[i], streams[i]); - } - - /* Keep count of number of files not at eof. */ - nleft = nfiles; - - while (nleft) - { - struct linebuffer *best = 0; - struct linebuffer *exch; - int bestfile = -1; - int i; - - /* Look at the next avail line of each file; choose the least one. */ - - for (i = 0; i < nfiles; i++) - { - if (thisline[i] && - (!best || - 0 < compare_general (best->buffer, thisline[i]->buffer, - (long) bestfile, (long) i, num_keyfields))) - { - best = thisline[i]; - bestfile = i; - } - } - - /* Output that line, unless it matches the previous one and we - don't want duplicates. */ - - if (!(prev_out && - !compare_general (prev_out->buffer, - best->buffer, 0L, 1L, num_keyfields - 1))) - indexify (best->buffer, ostream); - prev_out = best; - - /* Now make the line the previous of its file, and fetch a new - line from that file. */ - - exch = prevline[bestfile]; - prevline[bestfile] = thisline[bestfile]; - thisline[bestfile] = exch; - - while (1) - { - /* If the file has no more, mark it empty. */ - - if (feof (streams[bestfile])) - { - thisline[bestfile] = 0; - /* Update the number of files still not empty. */ - nleft--; - break; - } - readline (thisline[bestfile], streams[bestfile]); - if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) - break; - } - } - - finish_index (ostream); - - /* Free all storage and close all input streams. */ - - for (i = 0; i < nfiles; i++) - { - fclose (streams[i]); - free (lb1[i].buffer); - free (lb2[i].buffer); - } - free (file_lossage); - free (lb1); - free (lb2); - free (thisline); - free (prevline); - free (streams); - - if (outfile) - fclose (ostream); - - return lossage; -} - -/* Print error message and exit. */ - -void -fatal (s1, s2) - char *s1, *s2; -{ - error (s1, s2); - exit (TI_FATAL_ERROR); -} - -/* Print error message. S1 is printf control string, S2 is arg for it. */ - -void -error (s1, s2) - char *s1, *s2; -{ - printf ("%s: ", program_name); - printf (s1, s2); - printf ("\n"); -} - -#if !defined (HAVE_STRERROR) -static char * -strerror (n) - int n; -{ - static char ebuf[40]; - - if (n < sys_nerr) - return sys_errlist[n]; - else - { - sprintf (ebuf, "Unknown error %d", n); - return ebuf; - } -} -#endif - -void -perror_with_name (name) - char *name; -{ - char *s; - - s = concat ("", strerror (errno), " for %s"); - error (s, name); -} - -void -pfatal_with_name (name) - char *name; -{ - char *s; - - s = concat ("", strerror (errno), " for %s"); - fatal (s, name); -} - -/* Return a newly-allocated string whose contents concatenate those of - S1, S2, S3. */ - -char * -concat (s1, s2, s3) - char *s1, *s2, *s3; -{ - int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); - char *result = (char *) xmalloc (len1 + len2 + len3 + 1); - - strcpy (result, s1); - strcpy (result + len1, s2); - strcpy (result + len1 + len2, s3); - *(result + len1 + len2 + len3) = 0; - - return result; -} - -/* Just like malloc, but kills the program in case of fatal error. */ -void * -xmalloc (nbytes) - int nbytes; -{ - void *temp = (void *) malloc (nbytes); - - if (nbytes && temp == (void *)NULL) - memory_error ("xmalloc", nbytes); - - return (temp); -} - -/* Like realloc (), but barfs if there isn't enough memory. */ -void * -xrealloc (pointer, nbytes) - void *pointer; - int nbytes; -{ - void *temp; - - if (!pointer) - temp = (void *)xmalloc (nbytes); - else - temp = (void *)realloc (pointer, nbytes); - - if (nbytes && !temp) - memory_error ("xrealloc", nbytes); - - return (temp); -} - -memory_error (callers_name, bytes_wanted) - char *callers_name; - int bytes_wanted; -{ - char printable_string[80]; - - sprintf (printable_string, - "Virtual memory exhausted in %s ()! Needed %d bytes.", - callers_name, bytes_wanted); - - error (printable_string, ""); - abort (); -} diff --git a/gnu/lib/libreadline/readline/chardefs.h b/gnu/lib/libreadline/readline/chardefs.h deleted file mode 100644 index aa63da61da5c..000000000000 --- a/gnu/lib/libreadline/readline/chardefs.h +++ /dev/null @@ -1,89 +0,0 @@ -/* chardefs.h -- Character definitions for readline. */ -#ifndef _CHARDEFS_ -#define _CHARDEFS_ - -#include <ctype.h> -#include <string.h> - -#ifndef savestring -extern char *xmalloc (); -#define savestring(x) strcpy (xmalloc (1 + strlen (x)), (x)) -#endif - -#ifndef whitespace -#define whitespace(c) (((c) == ' ') || ((c) == '\t')) -#endif - -#ifdef CTRL -#undef CTRL -#endif - -/* Some character stuff. */ -#define control_character_threshold 0x020 /* Smaller than this is control. */ -#define meta_character_threshold 0x07f /* Larger than this is Meta. */ -#define control_character_bit 0x40 /* 0x000000, must be off. */ -#define meta_character_bit 0x080 /* x0000000, must be on. */ -#define largest_char 255 /* Largest character value. */ - -#define META_CHAR(c) ((c) > meta_character_threshold && (c) <= largest_char) -#define CTRL(c) ((c) & (~control_character_bit)) -#define META(c) ((c) | meta_character_bit) - -#define UNMETA(c) ((c) & (~meta_character_bit)) -#define UNCTRL(c) to_upper(((c)|control_character_bit)) - -#define lowercase_p(c) islower(c) -#define uppercase_p(c) isupper(c) - -#define pure_alphabetic(c) isalpha(c) - -#ifndef to_upper -#define to_upper(c) toupper(c) -#define to_lower(c) tolower(c) -#endif - -#define CTRL_P(c) ((c) < control_character_threshold) -#define META_P(c) ((c) > meta_character_threshold) - -#ifndef digit_value -#define digit_value(x) ((x) - '0') -#endif - -#ifndef NEWLINE -#define NEWLINE '\n' -#endif - -#ifndef RETURN -#define RETURN CTRL('M') -#endif - -#ifndef RUBOUT -#define RUBOUT 0x7f -#endif - -#ifndef TAB -#define TAB '\t' -#endif - -#ifdef ABORT_CHAR -#undef ABORT_CHAR -#endif -#define ABORT_CHAR CTRL('G') - -#ifdef PAGE -#undef PAGE -#endif -#define PAGE CTRL('L') - -#ifdef SPACE -#undef SPACE -#endif -#define SPACE 0x20 - -#ifdef ESC -#undef ESC -#endif - -#define ESC CTRL('[') - -#endif /* _CHARDEFS_ */ diff --git a/gnu/lib/libreadline/readline/history.h b/gnu/lib/libreadline/readline/history.h deleted file mode 100644 index 2ef5424cb184..000000000000 --- a/gnu/lib/libreadline/readline/history.h +++ /dev/null @@ -1,149 +0,0 @@ -/* History.h -- the names of functions that you can call in history. */ - -/* The structure used to store a history entry. */ -typedef struct _hist_entry { - char *line; - char *data; -} HIST_ENTRY; - -/* A structure used to pass the current state of the history stuff around. */ -typedef struct _hist_state { - HIST_ENTRY **entries; /* Pointer to the entries themselves. */ - int offset; /* The location pointer within this array. */ - int length; /* Number of elements within this array. */ - int size; /* Number of slots allocated to this array. */ -} HISTORY_STATE; - -/* For convenience only. You set this when interpreting history commands. - It is the logical offset of the first history element. */ -extern int history_base; - -/* Begin a session in which the history functions might be used. This - just initializes the interactive variables. */ -extern void using_history (); - -/* Return the current HISTORY_STATE of the history. */ -extern HISTORY_STATE *history_get_history_state (); - -/* Set the state of the current history array to STATE. */ -extern void history_set_history_state (); - -/* Place STRING at the end of the history list. - The associated data field (if any) is set to NULL. */ -extern void add_history (); - -/* Returns the number which says what history element we are now - looking at. */ -extern int where_history (); - -/* Set the position in the history list to POS. */ -int history_set_pos (); - -/* Search for STRING in the history list, starting at POS, an - absolute index into the list. DIR, if negative, says to search - backwards from POS, else forwards. - Returns the absolute index of the history element where STRING - was found, or -1 otherwise. */ -extern int history_search_pos (); - -/* A reasonably useless function, only here for completeness. WHICH - is the magic number that tells us which element to delete. The - elements are numbered from 0. */ -extern HIST_ENTRY *remove_history (); - -/* Stifle the history list, remembering only MAX number of entries. */ -extern void stifle_history (); - -/* Stop stifling the history. This returns the previous amount the - history was stifled by. The value is positive if the history was - stifled, negative if it wasn't. */ -extern int unstifle_history (); - -/* Add the contents of FILENAME to the history list, a line at a time. - If FILENAME is NULL, then read from ~/.history. Returns 0 if - successful, or errno if not. */ -extern int read_history (); - -/* Read a range of lines from FILENAME, adding them to the history list. - Start reading at the FROM'th line and end at the TO'th. If FROM - is zero, start at the beginning. If TO is less than FROM, read - until the end of the file. If FILENAME is NULL, then read from - ~/.history. Returns 0 if successful, or errno if not. */ -extern int read_history_range (); - -/* Append the current history to FILENAME. If FILENAME is NULL, - then append the history list to ~/.history. Values returned - are as in read_history (). */ -extern int write_history (); - -/* Append NELEMENT entries to FILENAME. The entries appended are from - the end of the list minus NELEMENTs up to the end of the list. */ -int append_history (); - -/* Make the history entry at WHICH have LINE and DATA. This returns - the old entry so you can dispose of the data. In the case of an - invalid WHICH, a NULL pointer is returned. */ -extern HIST_ENTRY *replace_history_entry (); - -/* Return the history entry at the current position, as determined by - history_offset. If there is no entry there, return a NULL pointer. */ -HIST_ENTRY *current_history (); - -/* Back up history_offset to the previous history entry, and return - a pointer to that entry. If there is no previous entry, return - a NULL pointer. */ -extern HIST_ENTRY *previous_history (); - -/* Move history_offset forward to the next item in the input_history, - and return the a pointer to that entry. If there is no next entry, - return a NULL pointer. */ -extern HIST_ENTRY *next_history (); - -/* Return a NULL terminated array of HIST_ENTRY which is the current input - history. Element 0 of this list is the beginning of time. If there - is no history, return NULL. */ -extern HIST_ENTRY **history_list (); - -/* Search the history for STRING, starting at history_offset. - If DIRECTION < 0, then the search is through previous entries, - else through subsequent. If the string is found, then - current_history () is the history entry, and the value of this function - is the offset in the line of that history entry that the string was - found in. Otherwise, nothing is changed, and a -1 is returned. */ -extern int history_search (); - -/* Expand the string STRING, placing the result into OUTPUT, a pointer - to a string. Returns: - - 0) If no expansions took place (or, if the only change in - the text was the de-slashifying of the history expansion - character) - 1) If expansions did take place - -1) If there was an error in expansion. - - If an error ocurred in expansion, then OUTPUT contains a descriptive - error message. */ -extern int history_expand (); - -/* Return an array of tokens, much as the shell might. The tokens are - parsed out of STRING. */ -extern char **history_tokenize (); - -/* Extract a string segment consisting of the FIRST through LAST - arguments present in STRING. Arguments are broken up as in - the shell. */ -extern char *history_arg_extract (); - -/* Return the number of bytes that the primary history entries are using. - This just adds up the lengths of the_history->lines. */ -extern int history_total_bytes (); - -/* Exported history variables. */ -extern int history_stifled; -extern int history_length; -extern int max_input_history; -extern char history_expansion_char; -extern char history_subst_char; -extern char history_comment_char; -extern char *history_no_expand_chars; -extern int history_base; diff --git a/gnu/lib/libreadline/readline/keymaps.h b/gnu/lib/libreadline/readline/keymaps.h deleted file mode 100644 index f7e9f6f9689b..000000000000 --- a/gnu/lib/libreadline/readline/keymaps.h +++ /dev/null @@ -1,91 +0,0 @@ -/* keymaps.h -- Manipulation of readline keymaps. */ - -/* Copyright (C) 1987, 1989, 1992 Free Software Foundation, Inc. - - This file is part of the GNU Readline Library, a library for - reading lines of text with interactive input and history editing. - - The GNU Readline Library is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 1, or - (at your option) any later version. - - The GNU Readline Library is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty - of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - The GNU General Public License is often shipped with GNU software, and - is generally kept in a file called COPYING or LICENSE. If you do not - have a copy of the license, write to the Free Software Foundation, - 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef _KEYMAPS_H_ -#define _KEYMAPS_H_ - -#include <readline/chardefs.h> - -#if !defined (__FUNCTION_DEF) -# define __FUNCTION_DEF -typedef int Function (); -typedef void VFunction (); -typedef char *CPFunction (); -typedef char **CPPFunction (); -#endif - -/* A keymap contains one entry for each key in the ASCII set. - Each entry consists of a type and a pointer. - POINTER is the address of a function to run, or the - address of a keymap to indirect through. - TYPE says which kind of thing POINTER is. */ -typedef struct _keymap_entry { - char type; - Function *function; -} KEYMAP_ENTRY; - -/* This must be large enough to hold bindings for all of the characters - in a desired character set (e.g, 128 for ASCII, 256 for ISO Latin-x, - and so on). */ -#define KEYMAP_SIZE 256 - -/* I wanted to make the above structure contain a union of: - union { Function *function; struct _keymap_entry *keymap; } value; - but this made it impossible for me to create a static array. - Maybe I need C lessons. */ - -typedef KEYMAP_ENTRY KEYMAP_ENTRY_ARRAY[KEYMAP_SIZE]; -typedef KEYMAP_ENTRY *Keymap; - -/* The values that TYPE can have in a keymap entry. */ -#define ISFUNC 0 -#define ISKMAP 1 -#define ISMACR 2 - -extern KEYMAP_ENTRY_ARRAY emacs_standard_keymap, emacs_meta_keymap, emacs_ctlx_keymap; -extern KEYMAP_ENTRY_ARRAY vi_insertion_keymap, vi_movement_keymap; - -/* Return a new, empty keymap. - Free it with free() when you are done. */ -extern Keymap rl_make_bare_keymap (); - -/* Return a new keymap which is a copy of MAP. */ -extern Keymap rl_copy_keymap (); - -/* Return a new keymap with the printing characters bound to rl_insert, - the lowercase Meta characters bound to run their equivalents, and - the Meta digits bound to produce numeric arguments. */ -extern Keymap rl_make_keymap (); - -extern void rl_discard_keymap (); - -/* Return the keymap corresponding to a given name. Names look like - `emacs' or `emacs-meta' or `vi-insert'. */ -extern Keymap rl_get_keymap_by_name (); - -/* Return the current keymap. */ -extern Keymap rl_get_keymap (); - -/* Set the current keymap to MAP. */ -extern void rl_set_keymap (); - -#endif /* _KEYMAPS_H_ */ diff --git a/gnu/lib/libreadline/readline/readline.h b/gnu/lib/libreadline/readline/readline.h deleted file mode 100644 index bbc8a0f02ac7..000000000000 --- a/gnu/lib/libreadline/readline/readline.h +++ /dev/null @@ -1,267 +0,0 @@ -/* Readline.h -- the names of functions callable from within readline. */ - -/* Copyright (C) 1987, 1989, 1992 Free Software Foundation, Inc. - - This file is part of the GNU Readline Library, a library for - reading lines of text with interactive input and history editing. - - The GNU Readline Library is free software; you can redistribute it - and/or modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 1, or - (at your option) any later version. - - The GNU Readline Library is distributed in the hope that it will be - useful, but WITHOUT ANY WARRANTY; without even the implied warranty - of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - The GNU General Public License is often shipped with GNU software, and - is generally kept in a file called COPYING or LICENSE. If you do not - have a copy of the license, write to the Free Software Foundation, - 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#if !defined (_READLINE_H_) -#define _READLINE_H_ - -#include <readline/keymaps.h> -#include <readline/tilde.h> - -/* The functions for manipulating the text of the line within readline. -Most of these functions are bound to keys by default. */ -extern int - rl_tilde_expand (), - rl_beg_of_line (), rl_backward (), rl_delete (), rl_end_of_line (), - rl_forward (), ding (), rl_backward (), rl_newline (), rl_kill_line (), - rl_clear_screen (), rl_get_next_history (), rl_get_previous_history (), - rl_quoted_insert (), rl_reverse_search_history (), rl_transpose_chars (), - rl_unix_line_discard (), rl_quoted_insert (), rl_unix_word_rubout (), - rl_yank (), rl_rubout (), rl_backward_word (), rl_kill_word (), - rl_forward_word (), rl_tab_insert (), rl_yank_pop (), rl_yank_nth_arg (), - rl_backward_kill_word (), rl_backward_kill_line (), rl_transpose_words (), - rl_complete (), rl_possible_completions (), rl_insert_completions (), - rl_do_lowercase_version (), rl_kill_full_line (), - rl_digit_argument (), rl_universal_argument (), rl_abort (), - rl_undo_command (), rl_revert_line (), rl_beginning_of_history (), - rl_end_of_history (), rl_forward_search_history (), rl_insert (), - rl_upcase_word (), rl_downcase_word (), rl_capitalize_word (), - rl_restart_output (), rl_re_read_init_file (), rl_dump_functions (), - rl_delete_horizontal_space (), rl_history_search_forward (), - rl_history_search_backward (); - -/* `Public' utility functions. */ -extern int rl_insert_text (), rl_delete_text (), rl_kill_text (); -extern int rl_complete_internal (); -extern int rl_expand_prompt (); -extern int rl_initialize (); -extern int rl_set_signals (), rl_clear_signals (); -extern int rl_init_argument (), rl_digit_argument (); -extern int rl_read_key (), rl_getc (), rl_stuff_char (); -extern int maybe_save_line (), maybe_unsave_line (), maybe_replace_line (); -extern int rl_modifying (); - -extern int rl_begin_undo_group (), rl_end_undo_group (); -extern void rl_add_undo (), free_undo_list (); -extern int rl_do_undo (); - -extern int rl_insert_close (); - -/* These are *both* defined even when VI_MODE is not. */ -extern int rl_vi_editing_mode (), rl_emacs_editing_mode (); - -/* Non incremental history searching. */ -extern int - rl_noninc_forward_search (), rl_noninc_reverse_search (), - rl_noninc_forward_search_again (), rl_noninc_reverse_search_again (); - -/* Things for vi mode. */ -extern int rl_vi_check (), rl_vi_textmod_command (); -extern int - rl_vi_redo (), rl_vi_tilde_expand (), - rl_vi_movement_mode (), rl_vi_insertion_mode (), rl_vi_arg_digit (), - rl_vi_prev_word (), rl_vi_next_word (), rl_vi_char_search (), - rl_vi_eof_maybe (), rl_vi_append_mode (), rl_vi_put (), - rl_vi_append_eol (), rl_vi_insert_beg (), rl_vi_delete (), rl_vi_comment (), - rl_vi_first_print (), rl_vi_fword (), rl_vi_fWord (), rl_vi_bword (), - rl_vi_bWord (), rl_vi_eword (), rl_vi_eWord (), rl_vi_end_word (), - rl_vi_change_case (), rl_vi_match (), rl_vi_bracktype (), - rl_vi_change_char (), rl_vi_yank_arg (), rl_vi_search (), - rl_vi_search_again (), rl_vi_subst (), rl_vi_overstrike (), - rl_vi_overstrike_delete (), rl_vi_replace(), rl_vi_column (), - rl_vi_delete_to (), rl_vi_change_to (), rl_vi_yank_to (), - rl_vi_complete (), rl_vi_fetch_history (); - -/* Keyboard macro commands. */ -extern int rl_start_kbd_macro (), rl_end_kbd_macro (); -extern int rl_call_last_kbd_macro (); - -extern int rl_arrow_keys(), rl_refresh_line (); - -/* Maintaining the state of undo. We remember individual deletes and inserts - on a chain of things to do. */ - -/* The actions that undo knows how to undo. Notice that UNDO_DELETE means - to insert some text, and UNDO_INSERT means to delete some text. I.e., - the code tells undo what to undo, not how to undo it. */ -enum undo_code { UNDO_DELETE, UNDO_INSERT, UNDO_BEGIN, UNDO_END }; - -/* What an element of THE_UNDO_LIST looks like. */ -typedef struct undo_list { - struct undo_list *next; - int start, end; /* Where the change took place. */ - char *text; /* The text to insert, if undoing a delete. */ - enum undo_code what; /* Delete, Insert, Begin, End. */ -} UNDO_LIST; - -/* The current undo list for RL_LINE_BUFFER. */ -extern UNDO_LIST *rl_undo_list; - -/* The data structure for mapping textual names to code addresses. */ -typedef struct { - char *name; - Function *function; -} FUNMAP; - -extern FUNMAP **funmap; - -/* **************************************************************** */ -/* */ -/* Well Published Variables */ -/* */ -/* **************************************************************** */ - -/* The name of the calling program. You should initialize this to - whatever was in argv[0]. It is used when parsing conditionals. */ -extern char *rl_readline_name; - -/* The line buffer that is in use. */ -extern char *rl_line_buffer; - -/* The location of point, and end. */ -extern int rl_point, rl_end; - -/* The name of the terminal to use. */ -extern char *rl_terminal_name; - -/* The input and output streams. */ -extern FILE *rl_instream, *rl_outstream; - -/* The basic list of characters that signal a break between words for the - completer routine. The initial contents of this variable is what - breaks words in the shell, i.e. "n\"\\'`@$>". */ -extern char *rl_basic_word_break_characters; - -/* The list of characters that signal a break between words for - rl_complete_internal. The default list is the contents of - rl_basic_word_break_characters. */ -extern char *rl_completer_word_break_characters; - -/* List of characters which can be used to quote a substring of the line. - Completion occurs on the entire substring, and within the substring - rl_completer_word_break_characters are treated as any other character, - unless they also appear within this list. */ -extern char *rl_completer_quote_characters; - -/* List of characters that are word break characters, but should be left - in TEXT when it is passed to the completion function. The shell uses - this to help determine what kind of completing to do. */ -extern char *rl_special_prefixes; - -/* Pointer to the generator function for completion_matches (). - NULL means to use filename_entry_function (), the default filename - completer. */ -extern Function *rl_completion_entry_function; - -/* If rl_ignore_some_completions_function is non-NULL it is the address - of a function to call after all of the possible matches have been - generated, but before the actual completion is done to the input line. - The function is called with one argument; a NULL terminated array - of (char *). If your function removes any of the elements, they - must be free()'ed. */ -extern Function *rl_ignore_some_completions_function; - -/* Pointer to alternative function to create matches. - Function is called with TEXT, START, and END. - START and END are indices in RL_LINE_BUFFER saying what the boundaries - of TEXT are. - If this function exists and returns NULL then call the value of - rl_completion_entry_function to try to match, otherwise use the - array of strings returned. */ -extern CPPFunction *rl_attempted_completion_function; - -/* If non-zero, then this is the address of a function to call just - before readline_internal () prints the first prompt. */ -extern Function *rl_startup_hook; - -/* If non-zero, then this is the address of a function to call when - completing on a directory name. The function is called with - the address of a string (the current directory name) as an arg. */ -extern Function *rl_directory_completion_hook; - -/* Backwards compatibility with previous versions of readline. */ -#define rl_symbolic_link_hook rl_directory_completion_hook - -/* The address of a function to call periodically while Readline is - awaiting character input, or NULL, for no event handling. */ -extern Function *rl_event_hook; - -/* Non-zero means that modified history lines are preceded - with an asterisk. */ -extern int rl_show_star; - -/* Non-zero means to suppress normal filename completion after the - user-specified completion function has been called. */ -extern int rl_attempted_completion_over; - -/* **************************************************************** */ -/* */ -/* Well Published Functions */ -/* */ -/* **************************************************************** */ - -/* Read a line of input. Prompt with PROMPT. A NULL PROMPT means none. */ -extern char *readline (); - -/* These functions are from complete.c. */ -/* Return an array of strings which are the result of repeatadly calling - FUNC with TEXT. */ -extern char **completion_matches (); -extern char *username_completion_function (); -extern char *filename_completion_function (); - -/* These functions are from bind.c. */ -/* rl_add_defun (char *name, Function *function, int key) - Add NAME to the list of named functions. Make FUNCTION - be the function that gets called. - If KEY is not -1, then bind it. */ -extern int rl_add_defun (); -extern int rl_bind_key (), rl_bind_key_in_map (); -extern int rl_unbind_key (), rl_unbind_key_in_map (); -extern int rl_set_key (); -extern int rl_macro_bind (), rl_generic_bind (), rl_variable_bind (); -extern int rl_translate_keyseq (); -extern Function *rl_named_function (), *rl_function_of_keyseq (); -extern int rl_parse_and_bind (); -extern Keymap rl_get_keymap (), rl_get_keymap_by_name (); -extern void rl_set_keymap (); -extern char **rl_invoking_keyseqs (), **rl_invoking_keyseqs_in_map (); -extern void rl_function_dumper (); -extern int rl_read_init_file (); - -/* Functions in funmap.c */ -extern void rl_list_funmap_names (); -extern void rl_initialize_funmap (); - -/* Functions in display.c */ -extern void rl_redisplay (); -extern int rl_message (), rl_clear_message (); -extern int rl_reset_line_state (); -extern int rl_character_len (); -extern int rl_show_char (); -extern int crlf (), rl_on_new_line (); -extern int rl_forced_update_display (); - -/* Definitions available for use by readline clients. */ -#define RL_PROMPT_START_IGNORE '\001' -#define RL_PROMPT_END_IGNORE '\002' - -#endif /* _READLINE_H_ */ diff --git a/gnu/lib/libreadline/readline/tilde.h b/gnu/lib/libreadline/readline/tilde.h deleted file mode 100644 index 726d081ba9cb..000000000000 --- a/gnu/lib/libreadline/readline/tilde.h +++ /dev/null @@ -1,38 +0,0 @@ -/* tilde.h: Externally available variables and function in libtilde.a. */ - -#if !defined (__TILDE_H__) -# define __TILDE_H__ - -/* Function pointers can be declared as (Function *)foo. */ -#if !defined (__FUNCTION_DEF) -# define __FUNCTION_DEF -typedef int Function (); -typedef void VFunction (); -typedef char *CPFunction (); -typedef char **CPPFunction (); -#endif /* _FUNCTION_DEF */ - -/* If non-null, this contains the address of a function to call if the - standard meaning for expanding a tilde fails. The function is called - with the text (sans tilde, as in "foo"), and returns a malloc()'ed string - which is the expansion, or a NULL pointer if there is no expansion. */ -extern CPFunction *tilde_expansion_failure_hook; - -/* When non-null, this is a NULL terminated array of strings which - are duplicates for a tilde prefix. Bash uses this to expand - `=~' and `:~'. */ -extern char **tilde_additional_prefixes; - -/* When non-null, this is a NULL terminated array of strings which match - the end of a username, instead of just "/". Bash sets this to - `:' and `=~'. */ -extern char **tilde_additional_suffixes; - -/* Return a new string which is the result of tilde expanding STRING. */ -extern char *tilde_expand (); - -/* Do the work of tilde expansion on FILENAME. FILENAME starts with a - tilde. If there is no expansion, call tilde_expansion_failure_hook. */ -extern char *tilde_expand_word (); - -#endif /* __TILDE_H__ */ diff --git a/gnu/lib/libreadline/sysdep.h b/gnu/lib/libreadline/sysdep.h deleted file mode 100644 index 007a56193482..000000000000 --- a/gnu/lib/libreadline/sysdep.h +++ /dev/null @@ -1,37 +0,0 @@ -/* System-dependent stuff, for ``normal'' systems */ -/* If you think you need to change this file, then you are wrong. In order to - avoid a huge ugly mass of nested #ifdefs, you should create a new file just - for your system, which contains exactly those #includes and definitions that - your system needs, AND NOTHING MORE! Then, add that file to the appropriate - place in configure.in, and viola, you are done. sysdep-sunos4.h is a good - example of how to do this. */ - -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else -#if defined (sparc) && defined (sun) -#include <alloca.h> -#endif -#ifndef alloca /* May be a macro, with args. */ -extern char *alloca (); -#endif -#endif - -#include <sys/types.h> /* Needed by dirent.h */ -#include <sys/ioctl.h> /* Needed for TIOC?WINSZ */ - -#if defined (USG) && defined (TIOCGWINSZ) -#include <sys/stream.h> -#if defined (USGr4) || defined (USGr3) -#include <sys/ptem.h> -#endif /* USGr4 */ -#endif /* USG && TIOCGWINSZ */ - -#include <dirent.h> -typedef struct dirent dirent; - -/* SVR4 systems should use <termios.h> rather than <termio.h>. */ - -#if defined (USGr4) -#define _POSIX_VERSION -#endif diff --git a/gnu/lib/libreadline/tcsh_hack.readme b/gnu/lib/libreadline/tcsh_hack.readme deleted file mode 100644 index 6fd5da173688..000000000000 --- a/gnu/lib/libreadline/tcsh_hack.readme +++ /dev/null @@ -1,27 +0,0 @@ -*** rltty.c.orig Thu May 12 19:02:50 1994 ---- rltty.c Thu May 12 19:03:06 1994 -*************** -*** 21,26 **** ---- 21,27 ---- - have a copy of the license, write to the Free Software Foundation, - 675 Mass Ave, Cambridge, MA 02139, USA. */ - #include <sys/types.h> -+ #include <sys/ioctl.h> - #include <signal.h> - #include <errno.h> - #include <stdio.h> -*************** -*** 359,364 **** ---- 360,371 ---- - int tty; - TIOTYPE *tiop; - { -+ /* XXX this prevents to got editing mode from tcsh. Ache */ -+ struct winsize w; -+ -+ if (ioctl (tty, TIOCGWINSZ, &w) == 0) -+ (void) ioctl (tty, TIOCSWINSZ, &w); -+ - while (GETATTR (tty, tiop) < 0) - { - if (errno != EINTR) diff --git a/gnu/lib/libregex/doc/Makefile.in b/gnu/lib/libregex/doc/Makefile.in deleted file mode 100644 index 2f5d382c06e5..000000000000 --- a/gnu/lib/libregex/doc/Makefile.in +++ /dev/null @@ -1,92 +0,0 @@ -# Makefile for regex documentation. -# -# Copyright (C) 1992 Free Software Foundation, Inc. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2, or (at your option) -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - -# Installation directories. -prefix = /usr/local -infodir = $(prefix)/info - -srcdir = @srcdir@ -VPATH = @srcdir@:../@srcdir@ - -INSTALL = @INSTALL@ -INSTALL_DATA = @INSTALL_DATA@ - -MAKEINFO = makeinfo --no-split -SHELL = /bin/sh -TEX = tex -TEXINDEX = texindex - -default all: regex.info regex.dvi -.PHONY: default all - -# We need to include some code from regex.h. -regex.texi: xregex.texi - rm -f $@ - gawk -f include.awk -vsource=../$(srcdir)/regex.h \ - <../$(srcdir)/doc/xregex.texi \ - | expand >$@ - chmod a-w $@ - -regex.dvi: regex.cps - $(TEX) regex.texi -regex.cps: regex.cp - $(TEXINDEX) regex.?? -regex.cp: regex.texi - $(TEX) ../$(srcdir)/doc/regex.texi - -regex.info: regex.texi - $(MAKEINFO) ../$(srcdir)/doc/regex.texi - -# I know of no way to make a good TAGS file from Texinfo source. -TAGS: - -check: -.PHONY: check - -install: regex.info - -mkdir $(prefix) $(infodir) - for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done -.PHONY: install - -clean mostlyclean: - rm -f regex.?? *.dvi *.log *.toc - -distclean: clean - rm -f Makefile - for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done - -realclean: distclean - rm -f *.info* regex.??? regex.texi TAGS - -extraclean: distclean - rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out -.PHONY: mostlyclean clean distclean realclean extraclean - -Makefile: Makefile.in ../config.status - (cd ..; sh config.status) - -# Prevent GNU make 3 from overflowing arg limit on system V. -.NOEXPORT: - -# Assumes $(distdir) is the place to put our files. -distfiles = Makefile.in *.texi texinfo.tex include.awk \ - regex.info* regex.aux regex.cps -dist: Makefile regex.info regex.cps - mkdir $(distdir) - ln $(distfiles) $(distdir) -.PHONY: dist diff --git a/gnu/lib/libregex/doc/regex.aux b/gnu/lib/libregex/doc/regex.aux deleted file mode 100644 index fd6a245eb111..000000000000 --- a/gnu/lib/libregex/doc/regex.aux +++ /dev/null @@ -1,136 +0,0 @@ -'xrdef {Overview-pg}{1} -'xrdef {Overview-snt}{Chapter'tie1} -'xrdef {Regular Expression Syntax-pg}{2} -'xrdef {Regular Expression Syntax-snt}{Chapter'tie2} -'xrdef {Syntax Bits-pg}{2} -'xrdef {Syntax Bits-snt}{Section'tie2.1} -'xrdef {Predefined Syntaxes-pg}{5} -'xrdef {Predefined Syntaxes-snt}{Section'tie2.2} -'xrdef {Collating Elements vs. Characters-pg}{6} -'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3} -'xrdef {The Backslash Character-pg}{7} -'xrdef {The Backslash Character-snt}{Section'tie2.4} -'xrdef {Common Operators-pg}{9} -'xrdef {Common Operators-snt}{Chapter'tie3} -'xrdef {Match-self Operator-pg}{9} -'xrdef {Match-self Operator-snt}{Section'tie3.1} -'xrdef {Match-any-character Operator-pg}{9} -'xrdef {Match-any-character Operator-snt}{Section'tie3.2} -'xrdef {Concatenation Operator-pg}{10} -'xrdef {Concatenation Operator-snt}{Section'tie3.3} -'xrdef {Repetition Operators-pg}{10} -'xrdef {Repetition Operators-snt}{Section'tie3.4} -'xrdef {Match-zero-or-more Operator-pg}{10} -'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1} -'xrdef {Match-one-or-more Operator-pg}{11} -'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2} -'xrdef {Match-zero-or-one Operator-pg}{11} -'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3} -'xrdef {Interval Operators-pg}{12} -'xrdef {Interval Operators-snt}{Section'tie3.4.4} -'xrdef {Alternation Operator-pg}{13} -'xrdef {Alternation Operator-snt}{Section'tie3.5} -'xrdef {List Operators-pg}{13} -'xrdef {List Operators-snt}{Section'tie3.6} -'xrdef {Character Class Operators-pg}{14} -'xrdef {Character Class Operators-snt}{Section'tie3.6.1} -'xrdef {Range Operator-pg}{15} -'xrdef {Range Operator-snt}{Section'tie3.6.2} -'xrdef {Grouping Operators-pg}{16} -'xrdef {Grouping Operators-snt}{Section'tie3.7} -'xrdef {Back-reference Operator-pg}{17} -'xrdef {Back-reference Operator-snt}{Section'tie3.8} -'xrdef {Anchoring Operators-pg}{18} -'xrdef {Anchoring Operators-snt}{Section'tie3.9} -'xrdef {Match-beginning-of-line Operator-pg}{18} -'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1} -'xrdef {Match-end-of-line Operator-pg}{18} -'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2} -'xrdef {GNU Operators-pg}{20} -'xrdef {GNU Operators-snt}{Chapter'tie4} -'xrdef {Word Operators-pg}{20} -'xrdef {Word Operators-snt}{Section'tie4.1} -'xrdef {Non-Emacs Syntax Tables-pg}{20} -'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1} -'xrdef {Match-word-boundary Operator-pg}{20} -'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2} -'xrdef {Match-within-word Operator-pg}{20} -'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3} -'xrdef {Match-beginning-of-word Operator-pg}{21} -'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4} -'xrdef {Match-end-of-word Operator-pg}{21} -'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5} -'xrdef {Match-word-constituent Operator-pg}{21} -'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6} -'xrdef {Match-non-word-constituent Operator-pg}{21} -'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7} -'xrdef {Buffer Operators-pg}{21} -'xrdef {Buffer Operators-snt}{Section'tie4.2} -'xrdef {Match-beginning-of-buffer Operator-pg}{21} -'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1} -'xrdef {Match-end-of-buffer Operator-pg}{21} -'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2} -'xrdef {GNU Emacs Operators-pg}{22} -'xrdef {GNU Emacs Operators-snt}{Chapter'tie5} -'xrdef {Syntactic Class Operators-pg}{22} -'xrdef {Syntactic Class Operators-snt}{Section'tie5.1} -'xrdef {Emacs Syntax Tables-pg}{22} -'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1} -'xrdef {Match-syntactic-class Operator-pg}{22} -'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2} -'xrdef {Match-not-syntactic-class Operator-pg}{22} -'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3} -'xrdef {What Gets Matched?-pg}{23} -'xrdef {What Gets Matched?-snt}{Chapter'tie6} -'xrdef {Programming with Regex-pg}{24} -'xrdef {Programming with Regex-snt}{Chapter'tie7} -'xrdef {GNU Regex Functions-pg}{24} -'xrdef {GNU Regex Functions-snt}{Section'tie7.1} -'xrdef {GNU Pattern Buffers-pg}{24} -'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1} -'xrdef {GNU Regular Expression Compiling-pg}{26} -'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2} -'xrdef {GNU Matching-pg}{27} -'xrdef {GNU Matching-snt}{Section'tie7.1.3} -'xrdef {GNU Searching-pg}{28} -'xrdef {GNU Searching-snt}{Section'tie7.1.4} -'xrdef {Matching/Searching with Split Data-pg}{29} -'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5} -'xrdef {Searching with Fastmaps-pg}{30} -'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6} -'xrdef {GNU Translate Tables-pg}{31} -'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7} -'xrdef {Using Registers-pg}{32} -'xrdef {Using Registers-snt}{Section'tie7.1.8} -'xrdef {Freeing GNU Pattern Buffers-pg}{34} -'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9} -'xrdef {POSIX Regex Functions-pg}{35} -'xrdef {POSIX Regex Functions-snt}{Section'tie7.2} -'xrdef {POSIX Pattern Buffers-pg}{35} -'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1} -'xrdef {POSIX Regular Expression Compiling-pg}{35} -'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2} -'xrdef {POSIX Matching-pg}{37} -'xrdef {POSIX Matching-snt}{Section'tie7.2.3} -'xrdef {Reporting Errors-pg}{38} -'xrdef {Reporting Errors-snt}{Section'tie7.2.4} -'xrdef {Using Byte Offsets-pg}{39} -'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5} -'xrdef {Freeing POSIX Pattern Buffers-pg}{39} -'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6} -'xrdef {BSD Regex Functions-pg}{40} -'xrdef {BSD Regex Functions-snt}{Section'tie7.3} -'xrdef {BSD Regular Expression Compiling-pg}{40} -'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1} -'xrdef {BSD Searching-pg}{40} -'xrdef {BSD Searching-snt}{Section'tie7.3.2} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{Appendix'tie'char65{}} -'xrdef {Copying-pg}{42} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{43} -'xrdef {Copying-snt}{} -'xrdef {Copying-pg}{48} -'xrdef {Copying-snt}{} -'xrdef {Index-pg}{50} -'xrdef {Index-snt}{} diff --git a/gnu/lib/libregex/doc/regex.cps b/gnu/lib/libregex/doc/regex.cps deleted file mode 100644 index 8b2e57c64e47..000000000000 --- a/gnu/lib/libregex/doc/regex.cps +++ /dev/null @@ -1,152 +0,0 @@ -\initial {$} -\entry {\code {$}}{18} -\initial {(} -\entry {\code {(}}{16} -\initial {)} -\entry {\code {)}}{16} -\initial {*} -\entry {\samp {*}}{10} -\initial {-} -\entry {\samp {-}}{13} -\initial {.} -\entry {\samp {.}}{9} -\initial {:} -\entry {\samp {:]} in regex}{14} -\initial {?} -\entry {\samp {?}}{11} -\initial {[} -\entry {\samp {[}}{13} -\entry {\samp {[:} in regex}{14} -\entry {\samp {[{\tt\hat}}}{13} -\initial {]} -\entry {\samp {]}}{13} -\initial {{\tt\char'173}} -\entry {\samp {{\tt\char'173}}}{12} -\initial {{\tt\char'174}} -\entry {\code {{\tt\char'174}}}{13} -\initial {{\tt\char'175}} -\entry {\samp {{\tt\char'175}}}{12} -\initial {{\tt\char43}} -\entry {\samp {{\tt\char43}}}{11} -\initial {{\tt\hat}} -\entry {\samp {{\tt\hat}}}{13} -\entry {\code {{\tt\hat}}}{18} -\initial {{\tt\indexbackslash }} -\entry {{\tt\indexbackslash }}{7} -\entry {\samp {{\tt\indexbackslash }}}{13} -\entry {\samp {{\tt\indexbackslash }'}}{21} -\entry {\code {{\tt\indexbackslash }(}}{16} -\entry {\code {{\tt\indexbackslash })}}{16} -\entry {\samp {{\tt\indexbackslash }`}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12} -\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13} -\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12} -\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21} -\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21} -\entry {\samp {{\tt\indexbackslash }b}}{20} -\entry {\samp {{\tt\indexbackslash }B}}{20} -\entry {\samp {{\tt\indexbackslash }s}}{22} -\entry {\samp {{\tt\indexbackslash }S}}{22} -\entry {\samp {{\tt\indexbackslash }w}}{21} -\entry {\samp {{\tt\indexbackslash }W}}{21} -\initial {A} -\entry {\code {allocated \r {initialization}}}{26} -\entry {alternation operator}{13} -\entry {alternation operator and \samp {{\tt\hat}}}{18} -\entry {anchoring}{18} -\entry {anchors}{18} -\entry {Awk}{5} -\initial {B} -\entry {back references}{17} -\entry {backtracking}{10, 13} -\entry {beginning-of-line operator}{18} -\entry {bracket expression}{13} -\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {buffer \r {initialization}}}{26} -\initial {C} -\entry {character classes}{14} -\initial {E} -\entry {Egrep}{5} -\entry {Emacs}{5} -\entry {end-of-line operator}{18} -\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {F} -\entry {\code {fastmap \r {initialization}}}{26} -\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {fastmaps}{30} -\initial {G} -\entry {Grep}{5} -\entry {grouping}{16} -\initial {I} -\entry {ignoring case}{35} -\entry {interval expression}{12} -\initial {M} -\entry {matching list}{13} -\entry {matching newline}{13} -\entry {matching with GNU functions}{27} -\initial {N} -\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18} -\entry {nonmatching list}{13} -\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18} -\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\initial {O} -\entry {open-group operator and \samp {{\tt\hat}}}{18} -\entry {or operator}{13} -\initial {P} -\entry {parenthesizing}{16} -\entry {pattern buffer initialization}{26} -\entry {pattern buffer, definition of}{24} -\entry {POSIX Awk}{5} -\initial {R} -\entry {\code {range \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {re_registers}}{32} -\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3} -\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3} -\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18} -\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3} -\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3} -\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3} -\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4} -\entry {\code {RE{\_}INTERVALS}}{4} -\entry {\code {RE{\_}LIMITED{\_}OPS}}{4} -\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4} -\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4} -\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4} -\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26} -\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4} -\entry {\code {REG{\_}EXTENDED}}{35} -\entry {\code {REG{\_}ICASE}}{35} -\entry {\code {REG{\_}NEWLINE}}{36} -\entry {\code {REG{\_}NOSUB}}{35} -\entry {\code {regex.c}}{1} -\entry {\code {regex.h}}{1} -\entry {regexp anchoring}{18} -\entry {\code {regmatch{\_}t}}{39} -\entry {\code {regs{\_}allocated}}{32} -\entry {\code {REGS{\_}FIXED}}{33} -\entry {\code {REGS{\_}REALLOCATE}}{32} -\entry {\code {REGS{\_}UNALLOCATED}}{32} -\entry {regular expressions, syntax of}{2} -\initial {S} -\entry {searching with GNU functions}{28} -\entry {\code {start \r {argument to \code {re{\_}search}}}}{28} -\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32} -\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24} -\entry {subexpressions}{16} -\entry {syntax bits}{2} -\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\entry {syntax initialization}{26} -\entry {syntax of regular expressions}{2} -\initial {T} -\entry {\code {translate \r {initialization}}}{26} -\initial {U} -\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27} -\initial {W} -\entry {word boundaries, matching}{20} diff --git a/gnu/lib/libregex/doc/regex.info b/gnu/lib/libregex/doc/regex.info deleted file mode 100644 index 90deedeaf44f..000000000000 --- a/gnu/lib/libregex/doc/regex.info +++ /dev/null @@ -1,2836 +0,0 @@ -This is Info file regex.info, produced by Makeinfo-1.52 from the input -file .././doc/regex.texi. - - This file documents the GNU regular expression library. - - Copyright (C) 1992, 1993 Free Software Foundation, Inc. - - Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - - Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled "GNU General Public License" is included exactly as in -the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - - Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that the section entitled "GNU General Public License" -may be included in a translation approved by the Free Software -Foundation instead of in the original English. - - -File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir) - -Regular Expression Library -************************** - - This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - - The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -* Menu: - -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - -- The Detailed Node Listing -- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - -List Operators (`[' ... `]' and `[^' ... `]') - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top - -Overview -******** - - A "regular expression" (or "regexp", or "pattern") is a text string -that describes some (mathematical) set of strings. A regexp R -"matches" a string S if S is in the set of strings described by R. - - Using the Regex library, you can: - - * see if a string matches a specified pattern as a whole, and - - * search within a string for a substring matching a specified - pattern. - - Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -`foo' matches the string `foo' and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression `f*' matches -the set of strings made up of any number (including zero) of `f's. As -you can see, some characters in regular expressions match themselves -(such as `f') and some don't (such as `*'); the ones that don't match -themselves instead let you specify patterns that describe many -different strings. - - To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A "compiled pattern" is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - - The Regex library consists of two source files: `regex.h' and -`regex.c'. Regex provides three groups of functions with which you can -operate on regular expressions. One group--the GNU group--is more -powerful but not completely compatible with the other two, namely the -POSIX and Berkeley UNIX groups; its interface was designed specifically -for GNU. The other groups have the same interfaces as do the regular -expression functions in POSIX and Berkeley UNIX. - - We wrote this chapter with programmers in mind, not users of -programs--such as Emacs--that use Regex. We describe the Regex library -in its entirety, not how to write regular expressions that a particular -program understands. - - -File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top - -Regular Expression Syntax -************************* - - "Characters" are things you can type. "Operators" are things in a -regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - - Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters "ordinary". Other -characters represent either all or parts of fancier operators; e.g., -`.' represents what we call the match-any-character operator (which, no -surprise, matches (almost) any character); we call these characters -"special". Two different things determine what characters represent -what operators: - - 1. the regular expression syntax your program has told the Regex - library to recognize, and - - 2. the context of the character in the regular expression. - - In the following sections, we describe these things in more detail. - -* Menu: - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - - -File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax - -Syntax Bits -=========== - - In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the `syntax' field of the -pattern buffer of that regular expression. - - You get a pattern buffer by compiling a regular expression. *Note -GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more -information on pattern buffers. *Note GNU Regular Expression -Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD -Regular Expression Compiling::, for more information on compiling. - - Regex considers the value of the `syntax' field to be a collection of -bits; we refer to these bits as "syntax bits". In most cases, they -affect what characters represent what operators. We describe the -meanings of the operators to which we refer in *Note Common Operators::, -*Note GNU Operators::, and *Note GNU Emacs Operators::. - - For reference, here is the complete list of syntax bits, in -alphabetical order: - -`RE_BACKSLASH_ESCAPE_IN_LISTS' - If this bit is set, then `\' inside a list (*note List Operators::. - quotes (makes ordinary, if it's special) the following character; - if this bit isn't set, then `\' is an ordinary character inside - lists. (*Note The Backslash Character::, for what `\' does - outside of lists.) - -`RE_BK_PLUS_QM' - If this bit is set, then `\+' represents the match-one-or-more - operator and `\?' represents the match-zero-or-more operator; if - this bit isn't set, then `+' represents the match-one-or-more - operator and `?' represents the match-zero-or-one operator. This - bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_CHAR_CLASSES' - If this bit is set, then you can use character classes in lists; - if this bit isn't set, then you can't. - -`RE_CONTEXT_INDEP_ANCHORS' - If this bit is set, then `^' and `$' are special anywhere outside - a list; if this bit isn't set, then these characters are special - only in certain contexts. *Note Match-beginning-of-line - Operator::, and *Note Match-end-of-line Operator::. - -`RE_CONTEXT_INDEP_OPS' - If this bit is set, then certain characters are special anywhere - outside a list; if this bit isn't set, then those characters are - special only in some contexts and are ordinary elsewhere. - Specifically, if this bit isn't set then `*', and (if the syntax - bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?', - depending on the syntax bit `RE_BK_PLUS_QM') represent repetition - operators only if they're not first in a regular expression or - just after an open-group or alternation operator. The same holds - for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if - it is the beginning of a valid interval and the syntax bit - `RE_INTERVALS' is set. - -`RE_CONTEXT_INVALID_OPS' - If this bit is set, then repetition and alternation operators - can't be in certain positions within a regular expression. - Specifically, the regular expression is invalid if it has: - - * a repetition operator first in the regular expression or just - after a match-beginning-of-line, open-group, or alternation - operator; or - - * an alternation operator first or last in the regular - expression, just before a match-end-of-line operator, or just - after an alternation or open-group operator. - - If this bit isn't set, then you can put the characters - representing the repetition and alternation characters anywhere in - a regular expression. Whether or not they will in fact be - operators in certain positions depends on other syntax bits. - -`RE_DOT_NEWLINE' - If this bit is set, then the match-any-character operator matches - a newline; if this bit isn't set, then it doesn't. - -`RE_DOT_NOT_NULL' - If this bit is set, then the match-any-character operator doesn't - match a null character; if this bit isn't set, then it does. - -`RE_INTERVALS' - If this bit is set, then Regex recognizes interval operators; if - this bit isn't set, then it doesn't. - -`RE_LIMITED_OPS' - If this bit is set, then Regex doesn't recognize the - match-one-or-more, match-zero-or-one or alternation operators; if - this bit isn't set, then it does. - -`RE_NEWLINE_ALT' - If this bit is set, then newline represents the alternation - operator; if this bit isn't set, then newline is ordinary. - -`RE_NO_BK_BRACES' - If this bit is set, then `{' represents the open-interval operator - and `}' represents the close-interval operator; if this bit isn't - set, then `\{' represents the open-interval operator and `\}' - represents the close-interval operator. This bit is relevant only - if `RE_INTERVALS' is set. - -`RE_NO_BK_PARENS' - If this bit is set, then `(' represents the open-group operator and - `)' represents the close-group operator; if this bit isn't set, - then `\(' represents the open-group operator and `\)' represents - the close-group operator. - -`RE_NO_BK_REFS' - If this bit is set, then Regex doesn't recognize `\'DIGIT as the - back reference operator; if this bit isn't set, then it does. - -`RE_NO_BK_VBAR' - If this bit is set, then `|' represents the alternation operator; - if this bit isn't set, then `\|' represents the alternation - operator. This bit is irrelevant if `RE_LIMITED_OPS' is set. - -`RE_NO_EMPTY_RANGES' - If this bit is set, then a regular expression with a range whose - ending point collates lower than its starting point is invalid; if - this bit isn't set, then Regex considers such a range to be empty. - -`RE_UNMATCHED_RIGHT_PAREN_ORD' - If this bit is set and the regular expression has no matching - open-group operator, then Regex considers what would otherwise be - a close-group operator (based on how `RE_NO_BK_PARENS' is set) to - match `)'. - - -File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax - -Predefined Syntaxes -=================== - - If you're programming with Regex, you can set a pattern buffer's -(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::) -`syntax' field either to an arbitrary combination of syntax bits (*note -Syntax Bits::.) or else to the configurations defined by Regex. These -configurations define the syntaxes used by certain programs--GNU Emacs, -POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for -POSIX basic and extended regular expressions. - - The predefined syntaxes-taken directly from `regex.h'--are: - - #define RE_SYNTAX_EMACS 0 - - #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - - #define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - - #define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - - #define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - - /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ - #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - - #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - - /* Syntax bits common to both basic and extended POSIX regex syntax. */ - #define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - - #define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - - /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ - #define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - - #define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - - /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ - #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) - - -File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax - -Collating Elements vs. Characters -================================= - - POSIX generalizes the notion of a character to that of a collating -element. It defines a "collating element" to be "a sequence of one or -more bytes defined in the current collating sequence as a unit of -collation." - - This generalizes the notion of a character in two ways. First, a -single character can map into two or more collating elements. For -example, the German "es-zet" collates as the collating element `s' -followed by another collating element `s'. Second, two or more -characters can map into one collating element. For example, the -Spanish `ll' collates after `l' and before `m'. - - Since POSIX's "collating element" preserves the essential idea of a -"character," we use the latter, more familiar, term in this document. - - -File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax - -The Backslash Character -======================= - - The `\' character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set (*note -Syntax Bits::.). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - - 1. It stands for itself inside a list (*note List Operators::.) if - the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For - example, `[\]' would match `\'. - - 2. It quotes (makes ordinary, if it's special) the next character - when you use it either: - - * outside a list,(1) or - - * inside a list and the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - - 3. It introduces an operator when followed by certain ordinary - characters--sometimes only when certain syntax bits are set. See - the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR', - `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also: - - * `\b' represents the match-word-boundary operator (*note - Match-word-boundary Operator::.). - - * `\B' represents the match-within-word operator (*note - Match-within-word Operator::.). - - * `\<' represents the match-beginning-of-word operator - (*note Match-beginning-of-word Operator::.). - - * `\>' represents the match-end-of-word operator (*note - Match-end-of-word Operator::.). - - * `\w' represents the match-word-constituent operator (*note - Match-word-constituent Operator::.). - - * `\W' represents the match-non-word-constituent operator - (*note Match-non-word-constituent Operator::.). - - * `\`' represents the match-beginning-of-buffer operator and - `\'' represents the match-end-of-buffer operator (*note - Buffer Operators::.). - - * If Regex was compiled with the C preprocessor symbol `emacs' - defined, then `\sCLASS' represents the match-syntactic-class - operator and `\SCLASS' represents the - match-not-syntactic-class operator (*note Syntactic Class - Operators::.). - - 4. In all other cases, Regex ignores `\'. For example, `\n' matches - `n'. - - - ---------- Footnotes ---------- - - (1) Sometimes you don't have to explicitly quote special characters -to make them ordinary. For instance, most characters lose any special -meaning inside a list (*note List Operators::.). In addition, if the -syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't -set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by `*') matches itself in the regular expression -`*foo' because there is no preceding expression on which it can -operate. It is poor practice, however, to depend on this behavior; if -you want a special character to be ordinary outside a list, it's better -to always quote it, regardless. - - -File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top - -Common Operators -**************** - - You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -POSIX; GNU also uses these. Most operators have more than one -representation as characters. *Note Regular Expression Syntax::, for -what characters represent what operators under what circumstances. - - For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by `\'. For example, either `(' or `\(' represents the -open-group operator. Which one does depends on the setting of a syntax -bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical -reasons dictate some of the varying representations, while POSIX -dictates others. - - Finally, almost all characters lose any special meaning inside a list -(*note List Operators::.). - -* Menu: - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? {} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - - -File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators - -The Match-self Operator (ORDINARY CHARACTER) -============================================ - - This operator matches the character itself. All ordinary characters -(*note Regular Expression Syntax::.) represent this operator. For -example, `f' is always an ordinary character, so the regular expression -`f' matches only the string `f'. In particular, it does *not* match -the string `ff'. - - -File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators - -The Match-any-character Operator (`.') -====================================== - - This operator matches any single printing or nonprinting character -except it won't match a: - -newline - if the syntax bit `RE_DOT_NEWLINE' isn't set. - -null - if the syntax bit `RE_DOT_NOT_NULL' is set. - - The `.' (period) character represents this operator. For example, -`a.b' matches any three-character string beginning with `a' and ending -with `b'. - - -File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators - -The Concatenation Operator -========================== - - This operator concatenates two regular expressions A and B. No -character represents this operator; you simply put B after A. The -result is a regular expression that will match a string if A matches -its first part and B matches the rest. For example, `xy' (two -match-self operators) matches `xy'. - - -File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators - -Repetition Operators -==================== - - Repetition operators repeat the preceding regular expression a -specified number of times. - -* Menu: - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: {} - - -File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-more Operator (`*') -------------------------------------- - - This operator repeats the smallest possible preceding regular -expression as many times as necessary (including zero) to match the -pattern. `*' represents this operator. For example, `o*' matches any -string made up of zero or more `o's. Since this operator operates on -the smallest preceding regular expression, `fo*' has a repeating `o', -not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on. - - Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - - * is first in a regular expression, or - - * follows a match-beginning-of-line, open-group, or alternation - operator. - -Three different things can happen in these cases: - - 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the - regular expression is invalid. - - 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS' - is, then `*' represents the match-zero-or-more operator (which - then operates on the empty string). - - 3. Otherwise, `*' is ordinary. - - - The matcher processes a match-zero-or-more operator by first matching -as many repetitions of the smallest preceding regular expression as it -can. Then it continues to match the rest of the pattern. - - If it can't match the rest of the pattern, it backtracks (as many -times as necessary), each time discarding one of the matches until it -can either match the entire pattern or be certain that it cannot get a -match. For example, when matching `ca*ar' against `caaar', the matcher -first matches all three `a's of the string with the `a*' of the regular -expression. However, it cannot then match the final `ar' of the -regular expression against the final `r' of the string. So it -backtracks, discarding the match of the last `a' in the string. It can -then match the remaining `ar'. - - -File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators - -The Match-one-or-more Operator (`+' or `\+') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `+' represents this operator; if it is, then `\+' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression at least once; *note -Match-zero-or-more Operator::., for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `+' represents the match-one-or-more -operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'. - - -File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators - -The Match-zero-or-one Operator (`?' or `\?') --------------------------------------------- - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM' -isn't set, then `?' represents this operator; if it is, then `\?' does. - - This operator is similar to the match-zero-or-more operator except -that it repeats the preceding regular expression once or not at all; -*note Match-zero-or-more Operator::., to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - - For example, supposing that `?' represents the match-zero-or-one -operator; then `ca?r' matches both `car' and `cr', but nothing else. - - -File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators - -Interval Operators (`{' ... `}' or `\{' ... `\}') -------------------------------------------------- - - If the syntax bit `RE_INTERVALS' is set, then Regex recognizes -"interval expressions". They repeat the smallest possible preceding -regular expression a specified number of times. - - If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the -"open-interval operator" and `}' represents the "close-interval -operator" ; otherwise, `\{' and `\}' do. - - Specifically, supposing that `{' and `}' represent the open-interval -and close-interval operators; then: - -`{COUNT}' - matches exactly COUNT occurrences of the preceding regular - expression. - -`{MIN,}' - matches MIN or more occurrences of the preceding regular - expression. - -`{MIN, MAX}' - matches at least MIN but no more than MAX occurrences of the - preceding regular expression. - - The interval expression (but not necessarily the regular expression -that contains it) is invalid if: - - * MIN is greater than MAX, or - - * any of COUNT, MIN, or MAX are outside the range zero to - `RE_DUP_MAX' (which symbol `regex.h' defines). - - If the interval expression is invalid and the syntax bit -`RE_NO_BK_BRACES' is set, then Regex considers all the characters in -the would-be interval to be ordinary. If that bit isn't set, then the -regular expression is invalid. - - If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If -that bit isn't set, then Regex considers all the characters--other than -backslashes, which it ignores--in the would-be interval to be ordinary. - - -File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators - -The Alternation Operator (`|' or `\|') -====================================== - - If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR' -is set, then `|' represents this operator; otherwise, `\|' does. - - Alternatives match one of a choice of regular expressions: if you put -the character(s) representing the alternation operator between any two -regular expressions A and B, the result matches the union of the -strings that A and B match. For example, supposing that `|' is the -alternation operator, then `foo|bar|quux' would match any of `foo', -`bar' or `quux'. - - The alternation operator operates on the *largest* possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) Thus, the only way you -can delimit its arguments is to use grouping. For example, if `(' and -`)' are the open and close-group operators, then `fo(o|b)ar' would -match either `fooar' or `fobar'. (`foo|bar' would match `foo' or -`bar'.) - - The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say, -the first ("depth-first") combination it could match, since then it -would be content to match just `fooqbar'. - - -File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators - -List Operators (`[' ... `]' and `[^' ... `]') -============================================= - - "Lists", also called "bracket expressions", are a set of one or more -items. An "item" is a character, a character class expression, or a -range expression. The syntax bits affect which kinds of items you can -put in a list. We explain the last two items in subsections below. -Empty lists are invalid. - - A "matching list" matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an "open-matching-list operator" (represented by `[') and a -"close-list operator" (represented by `]'). - - For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the -empty string and any string composed of just `a's and `d's in any -order. Regex considers invalid a regular expression with a `[' but no -matching `]'. - - "Nonmatching lists" are similar to matching lists except that they -match a single character *not* represented by one of the list items. -You use an "open-nonmatching-list operator" (represented by `[^'(1)) -instead of an open-matching-list operator to start a nonmatching list. - - For example, `[^ab]' matches any character except `a' or `b'. - - If the `posix_newline' field in the pattern buffer (*note GNU Pattern -Buffers::. is set, then nonmatching lists do not match a newline. - - Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -`]' - ends the list if it's not the first list item. So, if you want to - make the `]' character a list item, you must put it first. - -`\' - quotes the next character if the syntax bit - `RE_BACKSLASH_ESCAPE_IN_LISTS' is set. - -`[:' - represents the open-character-class operator (*note Character - Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and - what follows is a valid character class expression. - -`:]' - represents the close-character-class operator if the syntax bit - `RE_CHAR_CLASSES' is set and what precedes it is an - open-character-class operator followed by a valid character class - name. - -`-' - represents the range operator (*note Range Operator::.) if it's - not first or last in a list or the ending point of a range. - -All other characters are ordinary. For example, `[.*]' matches `.' and -`*'. - -* Menu: - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - - ---------- Footnotes ---------- - - (1) Regex therefore doesn't consider the `^' to be the first -character in the list. If you put a `^' character first in (what you -think is) a matching list, you'll turn it into a nonmatching list. - - -File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators - -Character Class Operators (`[:' ... `:]') ------------------------------------------ - - If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex -recognizes character class expressions inside lists. A "character -class expression" matches one character from a given class. You form a -character class expression by putting a character class name between an -"open-character-class operator" (represented by `[:') and a -"close-character-class operator" (represented by `:]'). The character -class names and their meanings are: - -`alnum' - letters and digits - -`alpha' - letters - -`blank' - system-dependent; for GNU, a space or tab - -`cntrl' - control characters (in the ASCII encoding, code 0177 and codes - less than 040) - -`digit' - digits - -`graph' - same as `print' except omits space - -`lower' - lowercase letters - -`print' - printable characters (in the ASCII encoding, space tilde--codes - 040 through 0176) - -`punct' - neither control nor alphanumeric characters - -`space' - space, carriage return, newline, vertical tab, and form feed - -`upper' - uppercase letters - -`xdigit' - hexadecimal digits: `0'-`9', `a'-`f', `A'-`F' - -These correspond to the definitions in the C library's `<ctype.h>' -facility. For example, `[:alpha:]' corresponds to the standard -facility `isalpha'. Regex recognizes character class expressions only -inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]' -outside of a bracket expression and not followed by a repetition -operator matches just itself. - - -File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators - -The Range Operator (`-') ------------------------- - - Regex recognizes "range expressions" inside a list. They represent -those characters that fall between two elements in the current -collating sequence. You form a range expression by putting a "range -operator" between two characters.(1) `-' represents the range operator. -For example, `a-f' within a list represents all the characters from `a' -through `f' inclusively. - - If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression `[z-a]' would be invalid. If this bit isn't set, then Regex -considers such a range to be empty. - - Since `-' represents the range operator, if you want to make a `-' -character itself a list item, you must do one of the following: - - * Put the `-' either first or last in the list. - - * Include a range whose starting point collates strictly lower than - `-' and whose ending point collates equal or higher. Unless a - range is the first item in a list, a `-' can't be its starting - point, but *can* be its ending point. That is because Regex - considers `-' to be the range operator unless it is preceded by - another `-'. For example, in the ASCII encoding, `)', `*', `+', - `,', `-', `.', and `/' are contiguous characters in the collating - sequence. You might think that `[)-+--/]' has two ranges: `)-+' - and `--/'. Rather, it has the ranges `)-+' and `+--', plus the - character `/', so it matches, e.g., `,', not `.'. - - * Put a range whose starting point is `-' first in the list. - - For example, `[-a-z]' matches a lowercase letter or a hyphen (in -English, in ASCII). - - ---------- Footnotes ---------- - - (1) You can't use a character class for the starting or ending point -of a range, since a character class is not a single character. - - -File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators - -Grouping Operators (`(' ... `)' or `\(' ... `\)') -================================================= - - A "group", also known as a "subexpression", consists of an -"open-group operator", any number of other operators, and a -"close-group operator". Regex treats this sequence as a unit, just as -mathematics and programming languages treat a parenthesized expression -as a unit. - - Therefore, using "groups", you can: - - * delimit the argument(s) to an alternation operator (*note - Alternation Operator::.) or a repetition operator (*note - Repetition Operators::.). - - * keep track of the indices of the substring that matched a given - group. *Note Using Registers::, for a precise explanation. This - lets you: - - * use the back-reference operator (*note Back-reference - Operator::.). - - * use registers (*note Using Registers::.). - - If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the -open-group operator and `)' represents the close-group operator; -otherwise, `\(' and `\)' do. - - If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match `)'. - - -File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators - -The Back-reference Operator ("\"DIGIT) -====================================== - - If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by `\DIGIT' anywhere after -the end of a regular expression's DIGIT-th group (*note Grouping -Operators::.). - - DIGIT must be between `1' and `9'. The matcher assigns numbers 1 -through 9 to the first nine groups it encounters. By using one of `\1' -through `\9' after the corresponding group's close-group operator, you -can match a substring identical to the one that the group does. - - Back references match according to the following (in all examples -below, `(' represents the open-group, `)' the close-group, `{' the -open-interval and `}' the close-interval operator): - - * If the group matches a substring, the back reference matches an - identical substring. For example, `(a)\1' matches `aa' and - `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1' - matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't - set) string that is composed of two identical halves; the `(.*)' - matches the first half and the `\1' matches the second half. - - * If the group matches more than once (as it might if followed by, - e.g., a repetition operator), then the back reference matches the - substring the group *last* matched. For example, `((a*)b)*\1\2' - matches `aabababa'; first group 1 (the outer one) matches `aab' - and group 2 (the inner one) matches `aa'. Then group 1 matches - `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2' - matches `a'. - - * If the group doesn't participate in a match, i.e., it is part of an - alternative not taken or a repetition operator allows zero - repetitions of it, then the back reference makes the whole match - fail. For example, `(one()|two())-and-(three\2|four\3)' matches - `one-and-three' and `two-and-four', but not `one-and-four' or - `two-and-three'. For example, if the pattern matches `one-and-', - then its group 2 matches the empty string and its group 3 doesn't - participate in the match. So, if it then matches `four', then - when it tries to back reference group 3--which it will attempt to - do because `\3' follows the `four'--the match will fail because - group 3 didn't participate in the match. - - You can use a back reference as an argument to a repetition operator. -For example, `(a(b))\2*' matches `a' followed by two or more `b's. -Similarly, `(a(b))\2{3}' matches `abbbb'. - - If there is no preceding DIGIT-th subexpression, the regular -expression is invalid. - - -File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators - -Anchoring Operators -=================== - - These operators can constrain a pattern to match only at the -beginning or end of the entire string or at the beginning or end of a -line. - -* Menu: - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - - -File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators - -The Match-beginning-of-line Operator (`^') ------------------------------------------- - - This operator can match the empty string either at the beginning of -the string or after a newline character. Thus, it is said to "anchor" -the pattern to the beginning of a line. - - In the cases following, `^' represents this operator. (Otherwise, -`^' is ordinary.) - - * It (the `^') is first in the pattern, as in `^foo'. - - * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside - a bracket expression. - - * It follows an open-group or alternation operator, as in `a\(^b\)' - and `a\|^b'. *Note Grouping Operators::, and *Note Alternation - Operator::. - - These rules imply that some valid patterns containing `^' cannot be -matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set. - - If the `not_bol' field is set in the pattern buffer (*note GNU -Pattern Buffers::.), then `^' fails to match at the beginning of the -string. *Note POSIX Matching::, for when you might find this useful. - - If the `newline_anchor' field is set in the pattern buffer, then `^' -fails to match after a newline. This is useful when you do not regard -the string to be matched as broken into lines. - - -File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators - -The Match-end-of-line Operator (`$') ------------------------------------- - - This operator can match the empty string either at the end of the -string or before a newline character in the string. Thus, it is said -to "anchor" the pattern to the end of a line. - - It is always represented by `$'. For example, `foo$' usually -matches, e.g., `foo' and, e.g., the first three characters of -`foo\nbar'. - - Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of `^''s; see the previous section. (That is, -"beginning" becomes "end", "next" becomes "previous", and "after" -becomes "before".) - - -File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top - -GNU Operators -************* - - Following are operators that GNU defines (and POSIX doesn't). - -* Menu: - -* Word Operators:: -* Buffer Operators:: - - -File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators - -Word Operators -============== - - The operators in this section require Regex to recognize parts of -words. Regex uses a syntax table to determine whether or not a -character is part of a word, i.e., whether or not it is -"word-constituent". - -* Menu: - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - - -File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators - -Non-Emacs Syntax Tables ------------------------ - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. Regex always uses a `char *' variable `re_syntax_table' -as its syntax table. In some cases, it initializes this variable and -in others it expects you to initialize it. - - * If Regex is compiled with the preprocessor symbols `emacs' and - `SYNTAX_TABLE' both undefined, then Regex allocates - `re_syntax_table' and initializes an element I either to `Sword' - (which it defines) if I is a letter, number, or `_', or to zero if - it's not. - - * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE' - defined, then Regex expects you to define a `char *' variable - `re_syntax_table' to be a valid syntax table. - - * *Note Emacs Syntax Tables::, for what happens when Regex is - compiled with the preprocessor symbol `emacs' defined. - - -File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators - -The Match-word-boundary Operator (`\b') ---------------------------------------- - - This operator (represented by `\b') matches the empty string at -either the beginning or the end of a word. For example, `\brat\b' -matches the separate word `rat'. - - -File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators - -The Match-within-word Operator (`\B') -------------------------------------- - - This operator (represented by `\B') matches the empty string within a -word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat' -doesn't match `dirty rat'. - - -File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators - -The Match-beginning-of-word Operator (`\<') -------------------------------------------- - - This operator (represented by `\<') matches the empty string at the -beginning of a word. - - -File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators - -The Match-end-of-word Operator (`\>') -------------------------------------- - - This operator (represented by `\>') matches the empty string at the -end of a word. - - -File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators - -The Match-word-constituent Operator (`\w') ------------------------------------------- - - This operator (represented by `\w') matches any word-constituent -character. - - -File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators - -The Match-non-word-constituent Operator (`\W') ----------------------------------------------- - - This operator (represented by `\W') matches any character that is not -word-constituent. - - -File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators - -Buffer Operators -================ - - Following are operators which work on buffers. In Emacs, a "buffer" -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -* Menu: - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - - -File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators - -The Match-beginning-of-buffer Operator (`\`') ---------------------------------------------- - - This operator (represented by `\`') matches the empty string at the -beginning of the buffer. - - -File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators - -The Match-end-of-buffer Operator (`\'') ---------------------------------------- - - This operator (represented by `\'') matches the empty string at the -end of the buffer. - - -File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top - -GNU Emacs Operators -******************* - - Following are operators that GNU defines (and POSIX doesn't) that you -can use only when Regex is compiled with the preprocessor symbol -`emacs' defined. - -* Menu: - -* Syntactic Class Operators:: - - -File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators - -Syntactic Class Operators -========================= - - The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -* Menu: - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - - -File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators - -Emacs Syntax Tables -------------------- - - A "syntax table" is an array indexed by the characters in your -character set. In the ASCII encoding, therefore, a syntax table has -256 elements. - - If Regex is compiled with the preprocessor symbol `emacs' defined, -then Regex expects you to define and initialize the variable -`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables -are more complicated than Regex's own (*note Non-Emacs Syntax -Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs' -syntax tables. - - -File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators - -The Match-syntactic-class Operator (`\s'CLASS) ----------------------------------------------- - - This operator matches any character whose syntactic class is -represented by a specified character. `\sCLASS' represents this -operator where CLASS is the character representing the syntactic class -you want. For example, `w' represents the syntactic class of -word-constituent characters, so `\sw' matches any word-constituent -character. - - -File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators - -The Match-not-syntactic-class Operator (`\S'CLASS) --------------------------------------------------- - - This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is *not* -represented by the specified character. `\SCLASS' represents this -operator. For example, `w' represents the syntactic class of -word-constituent characters, so `\Sw' matches any character that is not -word-constituent. - - -File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top - -What Gets Matched? -****************** - - Regex usually matches strings according to the "leftmost longest" -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - - For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as -it would if it were to choose the longest match for the first -subexpression. - - -File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top - -Programming with Regex -********************** - - Here we describe how you use the Regex data structures and functions -in C programs. Regex has three interfaces: one designed for GNU, one -compatible with POSIX and one compatible with Berkeley UNIX. - -* Menu: - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - - -File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex - -GNU Regex Functions -=================== - - If you're writing code that doesn't need to be compatible with either -POSIX or Berkeley UNIX, you can use these functions. They provide more -options than the other interfaces. - -* Menu: - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - - -File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Pattern Buffers -------------------- - - To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A "pattern buffer" holds one compiled regular -expression.(1) - - You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - - `regex.h' defines the pattern buffer `struct' as follows: - - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ - #define REGS_UNALLOCATED 0 - #define REGS_REALLOCATE 1 - #define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - - ---------- Footnotes ---------- - - (1) Regular expressions are also referred to as "patterns," hence -the name "pattern buffer." - - -File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions - -GNU Regular Expression Compiling --------------------------------- - - In GNU, you can both match and search for a given regular expression. -To do either, you must first compile it in a pattern buffer (*note GNU -Pattern Buffers::.). - - Regular expressions match according to the syntax with which they were -compiled; with GNU, you indicate what syntax you want by setting the -variable `re_syntax_options' (declared in `regex.h' and defined in -`regex.c') before calling the compiling function, `re_compile_pattern' -(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::. - - You can change the value of `re_syntax_options' at any time. -Usually, however, you set its value once and then never change it. - - `re_compile_pattern' takes a pattern buffer as an argument. You must -initialize the following fields: - -`translate initialization' -`translate' - Initialize this to point to a translate table if you want one, or - to zero if you don't. We explain translate tables in *Note GNU - Translate Tables::. - -`fastmap' - Initialize this to nonzero if you want a fastmap, or to zero if you - don't. - -`buffer' -`allocated' - If you want `re_compile_pattern' to allocate memory for the - compiled pattern, set both of these to zero. If you have an - existing block of memory (allocated with `malloc') you want Regex - to use, set `buffer' to its address and `allocated' to its size (in - bytes). - - `re_compile_pattern' uses `realloc' to extend the space for the - compiled pattern as necessary. - - To compile a pattern buffer, use: - - char * - re_compile_pattern (const char *REGEX, const int REGEX_SIZE, - struct re_pattern_buffer *PATTERN_BUFFER) - -REGEX is the regular expression's address, REGEX_SIZE is its length, -and PATTERN_BUFFER is the pattern buffer's address. - - If `re_compile_pattern' successfully compiles the regular expression, -it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It -sets the pattern buffer's fields as follows: - -`buffer' - to the compiled pattern. - -`used' - to the number of bytes the compiled pattern in `buffer' occupies. - -`syntax' - to the current value of `re_syntax_options'. - -`re_nsub' - to the number of subexpressions in REGEX. - -`fastmap_accurate' - to zero on the theory that the pattern you're compiling is - different than the one previously compiled into `buffer'; in that - case (since you can't make a fastmap without a compiled pattern), - `fastmap' would either contain an incompatible fastmap, or nothing - at all. - - If `re_compile_pattern' can't compile REGEX, it returns an error -string corresponding to one of the errors listed in *Note POSIX Regular -Expression Compiling::. - - -File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions - -GNU Matching ------------- - - Matching the GNU way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've -compiled a pattern into a pattern buffer (*note GNU Regular Expression -Compiling::.), you can ask the matcher to match that pattern against a -string using: - - int - re_match (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, struct re_registers *REGS) - -PATTERN_BUFFER is the address of a pattern buffer containing a compiled -pattern. STRING is the string you want to match; it can contain -newline and null characters. SIZE is the length of that string. START -is the string index at which you want to begin matching; the first -character of STRING is at index zero. *Note Using Registers::, for a -explanation of REGS; you can safely pass zero. - - `re_match' matches the regular expression in PATTERN_BUFFER against -the string STRING according to the syntax in PATTERN_BUFFERS's `syntax' -field. (*Note GNU Regular Expression Compiling::, for how to set it.) -The function returns -1 if the compiled pattern does not match any part -of STRING and -2 if an internal error happens; otherwise, it returns -how many (possibly zero) characters of STRING the pattern matched. - - An example: suppose PATTERN_BUFFER points to a pattern buffer -containing the compiled pattern for `a*', and STRING points to `aaaaab' -(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3, -i.e., `a*' would have matched the last three `a's in STRING. If START -is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's -in STRING. If START is either 5 or 6, it returns zero. - - If START is not between zero and SIZE, then `re_match' returns -1. - - -File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions - -GNU Searching -------------- - - "Searching" means trying to match starting at successive positions -within a string. The function `re_search' does this. - - Before calling `re_search', you must compile your regular expression. -*Note GNU Regular Expression Compiling::. - - Here is the function declaration: - - int - re_search (struct re_pattern_buffer *PATTERN_BUFFER, - const char *STRING, const int SIZE, - const int START, const int RANGE, - struct re_registers *REGS) - -whose arguments are the same as those to `re_match' (*note GNU -Matching::.) except that the two arguments START and RANGE replace -`re_match''s argument START. - - If RANGE is positive, then `re_search' attempts a match starting -first at index START, then at START + 1 if that fails, and so on, up to -START + RANGE; if RANGE is negative, then it attempts a match starting -first at index START, then at START -1 if that fails, and so on. - - If START is not between zero and SIZE, then `re_search' returns -1. -When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE -- 1 is between zero and SIZE, if necessary; that way it won't search -outside of STRING. Similarly, when RANGE is negative, `re_search' -adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if -necessary. - - If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches -starting at consecutive positions; otherwise, it uses `fastmap' to make -the search more efficient. *Note Searching with Fastmaps::. - - If no match is found, `re_search' returns -1. If a match is found, -it returns the index where the match began. If an internal error -happens, it returns -2. - - -File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions - -Matching and Searching with Split Data --------------------------------------- - - Using the functions `re_match_2' and `re_search_2', you can match or -search in data that is divided into two strings. - - The function: - - int - re_match_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, - struct re_registers *REGS, - const int STOP) - -is similar to `re_match' (*note GNU Matching::.) except that you pass -*two* data strings and sizes, and an index STOP beyond which you don't -want the matcher to try matching. As with `re_match', if it succeeds, -`re_match_2' returns how many characters of STRING it matched. Regard -STRING1 and STRING2 as concatenated when you set the arguments START and -STOP and use the contents of REGS; `re_match_2' never returns a value -larger than SIZE1 + SIZE2. - - The function: - - int - re_search_2 (struct re_pattern_buffer *BUFFER, - const char *STRING1, const int SIZE1, - const char *STRING2, const int SIZE2, - const int START, const int RANGE, - struct re_registers *REGS, - const int STOP) - -is similarly related to `re_search'. - - -File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions - -Searching with Fastmaps ------------------------ - - If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in -the string than it does to check in a table whether or not the -character at that position could start a match. A "fastmap" is such a -table. - - More specifically, a fastmap is an array indexed by the characters in -your character set. Under the ASCII encoding, therefore, a fastmap has -256 elements. If you want the searcher to use a fastmap with a given -pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's `fastmap' field. You either can -compile the fastmap yourself or have `re_search' do it for you; when -`fastmap' is nonzero, it automatically compiles a fastmap the first -time you search using a particular compiled pattern. - - To compile a fastmap yourself, use: - - int - re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER) - -PATTERN_BUFFER is the address of a pattern buffer. If the character C -could start a match for the pattern, `re_compile_fastmap' makes -`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a -fastmap and -2 if there is an internal error. For example, if `|' is -the alternation operator and PATTERN_BUFFER holds the compiled pattern -for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and -`fastmap['b']' (and no others). - - `re_search' uses a fastmap as it moves along in the string: it checks -the string's characters until it finds one that's in the fastmap. Then -it tries matching at that character. If the match fails, it repeats -the process. So, by using a fastmap, `re_search' doesn't waste time -trying to match at positions in the string that couldn't start a match. - - If you don't want `re_search' to use a fastmap, store zero in the -`fastmap' field of the pattern buffer before calling `re_search'. - - Once you've initialized a pattern buffer's `fastmap' field, you need -never do so again--even if you compile a new pattern in it--provided -the way the field is set still reflects whether or not you want a -fastmap. `re_search' will still either do nothing if `fastmap' is null -or, if it isn't, compile a new fastmap for the new pattern. - - -File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions - -GNU Translate Tables --------------------- - - If you set the `translate' field of a pattern buffer to a translate -table, then the GNU Regex functions to which you've passed that pattern -buffer use it to apply a simple transformation to all the regular -expression and string characters at which they look. - - A "translate table" is an array indexed by the characters in your -character set. Under the ASCII encoding, therefore, a translate table -has 256 elements. The array's elements are also characters in your -character set. When the Regex functions see a character C, they use -`translate[C]' in its place, with one exception: the character after a -`\' is not translated. (This ensures that, the operators, e.g., `\B' -and `\b', are always distinguishable.) - - For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.(1) Such a table would map all characters except -lowercase letters to themselves, and lowercase letters to the -corresponding uppercase ones. Under the ASCII encoding, here's how you -could initialize such a table (we'll call it `case_fold'): - - for (i = 0; i < 256; i++) - case_fold[i] = i; - for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); - - You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the `translate' field of that buffer. -If you don't want Regex to do any translation, put zero into this -field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - - ---------- Footnotes ---------- - - (1) A table that maps all uppercase letters to the corresponding -lowercase ones would work just as well for this purpose. - - -File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions - -Using Registers ---------------- - - A group in a regular expression can match a (posssibly empty) -substring of the string that regular expression as a whole matched. -The matcher remembers the beginning and end of the substring matched by -each group. - - To find out what they matched, pass a nonzero REGS argument to a GNU -matching or searching function (*note GNU Matching::. and *Note GNU -Searching::), i.e., the address of a structure of this type, as defined -in `regex.h': - - struct re_registers - { - unsigned num_regs; - regoff_t *start; - regoff_t *end; - }; - - Except for (possibly) the NUM_REGS'th element (see below), the Ith -element of the `start' and `end' arrays records information about the -Ith group in the pattern. (They're declared as C pointers, but this is -only because not all C compilers accept zero-length arrays; -conceptually, it is simplest to think of them as arrays.) - - The `start' and `end' arrays are allocated in various ways, depending -on the value of the `regs_allocated' field in the pattern buffer passed -to the matcher. - - The simplest and perhaps most useful is to let the matcher -(re)allocate enough space to record information for all the groups in -the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the -matcher allocates 1 + RE_NSUB (another field in the pattern buffer; -*note GNU Pattern Buffers::.). The extra element is set to -1, and -sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls -with the same pattern buffer and REGS arguments, the matcher -reallocates more space if necessary. - - It would perhaps be more logical to make the `regs_allocated' field -part of the `re_registers' structure, instead of part of the pattern -buffer. But in that case the caller would be forced to initialize the -structure before passing it. Much existing code doesn't do this -initialization, and it's arguably better to avoid it anyway. - - `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so -if you use the GNU regular expression functions, you get this behavior -by default. - - xx document re_set_registers - - POSIX, on the other hand, requires a different interface: the caller -is supposed to pass in a fixed-length array which the matcher fills. -Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills -that array. - - The following examples illustrate the information recorded in the -`re_registers' structure. (In all of them, `(' represents the -open-group and `)' the close-group operator. The first character in -the string STRING is at index 0.) - - * If the regular expression has an I-th group not contained within - another group that matches a substring of STRING, then the - function sets `REGS->start[I]' to the index in STRING where the - substring matched by the I-th group begins, and `REGS->end[I]' to - the index just beyond that substring's end. The function sets - `REGS->start[0]' and `REGS->end[0]' to analogous information about - the entire pattern. - - For example, when you match `((a)(b))' against `ab', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * 1 in `REGS->start[3]' and 2 in `REGS->end[3]' - - * If a group matches more than once (as it might if followed by, - e.g., a repetition operator), then the function reports the - information about what the group *last* matched. - - For example, when you match the pattern `(a)*' against the string - `aa', you get: - - * 0 in `REGS->start[0]' and 2 in `REGS->end[0]' - - * 1 in `REGS->start[1]' and 2 in `REGS->end[1]' - - * If the I-th group does not participate in a successful match, - e.g., it is an alternative not taken or a repetition operator - allows zero repetitions of it, then the function sets - `REGS->start[I]' and `REGS->end[I]' to -1. - - For example, when you match the pattern `(a)*b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * If the I-th group matches a zero-length string, then the function - sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond - that zero-length string. - - For example, when you match the pattern `(a*)b' against the string - `b', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * 0 in `REGS->start[1]' and 0 in `REGS->end[1]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function reports a - match of the I-th group, then it records in `REGS->start[J]' and - `REGS->end[J]' the last match (if it matched) of the J-th group. - - For example, when you match the pattern `((a*)b)*' against the - string `abb', group 2 last matches the empty string, so you get - what it previously matched: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 2 in `REGS->start[2]' and 2 in `REGS->end[2]' - - When you match the pattern `((a)*b)*' against the string `abb', - group 2 doesn't participate in the last match, so you get: - - * 0 in `REGS->start[0]' and 3 in `REGS->end[0]' - - * 2 in `REGS->start[1]' and 3 in `REGS->end[1]' - - * 0 in `REGS->start[2]' and 1 in `REGS->end[2]' - - * If an I-th group contains a J-th group in turn not contained - within any other group within group I and the function sets - `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets - `REGS->start[J]' and `REGS->end[J]' to -1. - - For example, when you match the pattern `((a)*b)*c' against the - string `c', you get: - - * 0 in `REGS->start[0]' and 1 in `REGS->end[0]' - - * -1 in `REGS->start[1]' and -1 in `REGS->end[1]' - - * -1 in `REGS->start[2]' and -1 in `REGS->end[2]' - - -File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions - -Freeing GNU Pattern Buffers ---------------------------- - - To free any allocated fields of a pattern buffer, you can use the -POSIX function described in *Note Freeing POSIX Pattern Buffers::, -since the type `regex_t'--the type for POSIX pattern buffers--is -equivalent to the type `re_pattern_buffer'. After freeing a pattern -buffer, you need to again compile a regular expression in it (*note GNU -Regular Expression Compiling::.) before passing it to a matching or -searching function. - - -File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex - -POSIX Regex Functions -===================== - - If you're writing code that has to be POSIX compatible, you'll need -to use these functions. Their interfaces are as specified by POSIX, -draft 1003.2/D11.2. - -* Menu: - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - - -File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Pattern Buffers ---------------------- - - To compile or match a given regular expression the POSIX way, you -must supply a pattern buffer exactly the way you do for GNU (*note GNU -Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which -is equivalent to the GNU pattern buffer type `re_pattern_buffer'. - - -File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions - -POSIX Regular Expression Compiling ----------------------------------- - - With POSIX, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a pattern -buffer, using `regcomp'. - - To compile a pattern buffer, use: - - int - regcomp (regex_t *PREG, const char *REGEX, int CFLAGS) - -PREG is the initialized pattern buffer's address, REGEX is the regular -expression's address, and CFLAGS is the compilation flags, which Regex -considers as a collection of bits. Here are the valid bits, as defined -in `regex.h': - -`REG_EXTENDED' - says to use POSIX Extended Regular Expression syntax; if this isn't - set, then says to use POSIX Basic Regular Expression syntax. - `regcomp' sets PREG's `syntax' field accordingly. - -`REG_ICASE' - says to ignore case; `regcomp' sets PREG's `translate' field to a - translate table which ignores case, replacing anything you've put - there before. - -`REG_NOSUB' - says to set PREG's `no_sub' field; *note POSIX Matching::., for - what this means. - -`REG_NEWLINE' - says that a: - - * match-any-character operator (*note Match-any-character - Operator::.) doesn't match a newline. - - * nonmatching list not containing a newline (*note List - Operators::.) matches a newline. - - * match-beginning-of-line operator (*note - Match-beginning-of-line Operator::.) matches the empty string - immediately after a newline, regardless of how `REG_NOTBOL' - is set (*note POSIX Matching::., for an explanation of - `REG_NOTBOL'). - - * match-end-of-line operator (*note Match-beginning-of-line - Operator::.) matches the empty string immediately before a - newline, regardless of how `REG_NOTEOL' is set (*note POSIX - Matching::., for an explanation of `REG_NOTEOL'). - - If `regcomp' successfully compiles the regular expression, it returns -zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for -`syntax' (which it sets as explained above), it also sets the same -fields the same way as does the GNU compiling function (*note GNU -Regular Expression Compiling::.). - - If `regcomp' can't compile the regular expression, it returns one of -the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -`REG_BADRPT' - For example, the consecutive repetition operators `**' in `a**' - are invalid. As another example, if the syntax is extended - regular expression syntax, then the repetition operator `*' with - nothing on which to operate in `*' is invalid. - -`REG_BADBR' - For example, the COUNT `-1' in `a\{-1' is invalid. - -`REG_EBRACE' - For example, `a\{1' is missing a close-interval operator. - -`REG_EBRACK' - For example, `[a' is missing a close-list operator. - -`REG_ERANGE' - For example, the range ending point `z' that collates lower than - does its starting point `a' in `[z-a]' is invalid. Also, the - range with the character class `[:alpha:]' as its starting point in - `[[:alpha:]-|]'. - -`REG_ECTYPE' - For example, the character class name `foo' in `[[:foo:]' is - invalid. - -`REG_EPAREN' - For example, `a\)' is missing an open-group operator and `\(a' is - missing a close-group operator. - -`REG_ESUBREG' - For example, the back reference `\2' that refers to a nonexistent - subexpression in `\(a\)\2' is invalid. - -`REG_EEND' - Returned when a regular expression causes no other more specific - error. - -`REG_EESCAPE' - For example, the trailing backslash `\' in `a\' is invalid, as is - the one in `\'. - -`REG_BADPAT' - For example, in the extended regular expression syntax, the empty - group `()' in `a()b' is invalid. - -`REG_ESIZE' - Returned when a regular expression needs a pattern buffer larger - than 65536 bytes. - -`REG_ESPACE' - Returned when a regular expression makes Regex to run out of - memory. - - -File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions - -POSIX Matching --------------- - - Matching the POSIX way means trying to match a null-terminated string -starting at its first character. Once you've compiled a pattern into a -pattern buffer (*note POSIX Regular Expression Compiling::.), you can -ask the matcher to match that pattern against a string using: - - int - regexec (const regex_t *PREG, const char *STRING, - size_t NMATCH, regmatch_t PMATCH[], int EFLAGS) - -PREG is the address of a pattern buffer for a compiled pattern. STRING -is the string you want to match. - - *Note Using Byte Offsets::, for an explanation of PMATCH. If you -pass zero for NMATCH or you compiled PREG with the compilation flag -`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must -allocate it to have at least NMATCH elements. `regexec' will record -NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to -PMATCH`[NMATCH]' - 1. - - EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL' -and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then -the match-beginning-of-line operator (*note Match-beginning-of-line -Operator::.) always fails to match. This lets you match against pieces -of a line, as you would need to if, say, searching for repeated -instances of a given pattern in a line; it would work correctly for -patterns both with and without match-beginning-of-line operators. -`REG_NOTEOL' works analogously for the match-end-of-line operator -(*note Match-end-of-line Operator::.); it exists for symmetry. - - `regexec' tries to find a match for PREG in STRING according to the -syntax in PREG's `syntax' field. (*Note POSIX Regular Expression -Compiling::, for how to set it.) The function returns zero if the -compiled pattern matches STRING and `REG_NOMATCH' (defined in -`regex.h') if it doesn't. - - -File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions - -Reporting Errors ----------------- - - If either `regcomp' or `regexec' fail, they return a nonzero error -code, the possibilities for which are defined in `regex.h'. *Note -POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - - size_t - regerror (int ERRCODE, - const regex_t *PREG, - char *ERRBUF, - size_t ERRBUF_SIZE) - -ERRCODE is an error code, PREG is the address of the pattern buffer -which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE -is ERRBUF's size. - - `regerror' returns the size in bytes of the error string -corresponding to ERRCODE (including its terminating null). If ERRBUF -and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first -ERRBUF_SIZE - 1 characters of the error string, followed by a null. -eRRBUF_SIZE must be a nonnegative number less than or equal to the size -in bytes of ERRBUF. - - You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to -determine how large ERRBUF need be to accommodate `regerror''s error -string. - - -File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions - -Using Byte Offsets ------------------- - - In POSIX, variables of type `regmatch_t' hold analogous information, -but are not identical to, GNU's registers (*note Using Registers::.). -To get information about registers in POSIX, pass to `regexec' a -nonzero PMATCH of type `regmatch_t', i.e., the address of a structure -of this type, defined in `regex.h': - - typedef struct - { - regoff_t rm_so; - regoff_t rm_eo; - } regmatch_t; - - When reading in *Note Using Registers::, about how the matching -function stores the information into the registers, substitute PMATCH -for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and -`PMATCH[I]->rm_eo' for `REGS->end[I]'. - - -File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions - -Freeing POSIX Pattern Buffers ------------------------------ - - To free any allocated fields of a pattern buffer, use: - - void - regfree (regex_t *PREG) - -PREG is the pattern buffer whose allocated fields you want freed. -`regfree' also sets PREG's `allocated' and `used' fields to zero. -After freeing a pattern buffer, you need to again compile a regular -expression in it (*note POSIX Regular Expression Compiling::.) before -passing it to the matching function (*note POSIX Matching::.). - - -File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex - -BSD Regex Functions -=================== - - If you're writing code that has to be Berkeley UNIX compatible, -you'll need to use these functions whose interfaces are the same as -those in Berkeley UNIX. - -* Menu: - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () - - -File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions - -BSD Regular Expression Compiling --------------------------------- - - With Berkeley UNIX, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable `re_syntax_options' (declared in `regex.h' to some syntax -(*note Regular Expression Syntax::.). - - To compile a regular expression use: - - char * - re_comp (char *REGEX) - -REGEX is the address of a null-terminated regular expression. -`re_comp' uses an internal pattern buffer, so you can use only the most -recently compiled pattern buffer. This means that if you want to use a -given regular expression that you've already compiled--but it isn't the -latest one you've compiled--you'll have to recompile it. If you call -`re_comp' with the null string (*not* the empty string) as the -argument, it doesn't change the contents of the pattern buffer. - - If `re_comp' successfully compiles the regular expression, it returns -zero. If it can't compile the regular expression, it returns an error -string. `re_comp''s error messages are identical to those of -`re_compile_pattern' (*note GNU Regular Expression Compiling::.). - - -File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions - -BSD Searching -------------- - - Searching the Berkeley UNIX way means searching in a string starting -at its first character and trying successive positions within it to -find a match. Once you've compiled a pattern using `re_comp' (*note -BSD Regular Expression Compiling::.), you can ask Regex to search for -that pattern in a string using: - - int - re_exec (char *STRING) - -STRING is the address of the null-terminated string in which you want -to search. - - `re_exec' returns either 1 for success or 0 for failure. It -automatically uses a GNU fastmap (*note Searching with Fastmaps::.). - - -File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top - -GNU GENERAL PUBLIC LICENSE -************************** - - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA - - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - -Preamble -======== - - The licenses for most software are designed to take away your freedom -to share and change it. By contrast, the GNU General Public License is -intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it in -new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 1. This License applies to any program or other work which contains a - notice placed by the copyright holder saying it may be distributed - under the terms of this General Public License. The "Program", - below, refers to any such program or work, and a "work based on - the Program" means either the Program or any derivative work under - copyright law: that is to say, a work containing the Program or a - portion of it, either verbatim or with modifications and/or - translated into another language. (Hereinafter, translation is - included without limitation in the term "modification".) Each - licensee is addressed as "you". - - Activities other than copying, distribution and modification are - not covered by this License; they are outside its scope. The act - of running the Program is not restricted, and the output from the - Program is covered only if its contents constitute a work based on - the Program (independent of having been made by running the - Program). Whether that is true depends on what the Program does. - - 2. You may copy and distribute verbatim copies of the Program's - source code as you receive it, in any medium, provided that you - conspicuously and appropriately publish on each copy an appropriate - copyright notice and disclaimer of warranty; keep intact all the - notices that refer to this License and to the absence of any - warranty; and give any other recipients of the Program a copy of - this License along with the Program. - - You may charge a fee for the physical act of transferring a copy, - and you may at your option offer warranty protection in exchange - for a fee. - - 3. You may modify your copy or copies of the Program or any portion - of it, thus forming a work based on the Program, and copy and - distribute such modifications or work under the terms of Section 1 - above, provided that you also meet all of these conditions: - - a. You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b. You must cause any work that you distribute or publish, that - in whole or in part contains or is derived from the Program - or any part thereof, to be licensed as a whole at no charge - to all third parties under the terms of this License. - - c. If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display - an announcement including an appropriate copyright notice and - a notice that there is no warranty (or else, saying that you - provide a warranty) and that users may redistribute the - program under these conditions, and telling the user how to - view a copy of this License. (Exception: if the Program - itself is interactive but does not normally print such an - announcement, your work based on the Program is not required - to print an announcement.) - - These requirements apply to the modified work as a whole. If - identifiable sections of that work are not derived from the - Program, and can be reasonably considered independent and separate - works in themselves, then this License, and its terms, do not - apply to those sections when you distribute them as separate - works. But when you distribute the same sections as part of a - whole which is a work based on the Program, the distribution of - the whole must be on the terms of this License, whose permissions - for other licensees extend to the entire whole, and thus to each - and every part regardless of who wrote it. - - Thus, it is not the intent of this section to claim rights or - contest your rights to work written entirely by you; rather, the - intent is to exercise the right to control the distribution of - derivative or collective works based on the Program. - - In addition, mere aggregation of another work not based on the - Program with the Program (or with a work based on the Program) on - a volume of a storage or distribution medium does not bring the - other work under the scope of this License. - - 4. You may copy and distribute the Program (or a work based on it, - under Section 2) in object code or executable form under the terms - of Sections 1 and 2 above provided that you also do one of the - following: - - a. Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of - Sections 1 and 2 above on a medium customarily used for - software interchange; or, - - b. Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a - medium customarily used for software interchange; or, - - c. Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with - such an offer, in accord with Subsection b above.) - - The source code for a work means the preferred form of the work for - making modifications to it. For an executable work, complete - source code means all the source code for all modules it contains, - plus any associated interface definition files, plus the scripts - used to control compilation and installation of the executable. - However, as a special exception, the source code distributed need - not include anything that is normally distributed (in either - source or binary form) with the major components (compiler, - kernel, and so on) of the operating system on which the executable - runs, unless that component itself accompanies the executable. - - If distribution of executable or object code is made by offering - access to copy from a designated place, then offering equivalent - access to copy the source code from the same place counts as - distribution of the source code, even though third parties are not - compelled to copy the source along with the object code. - - 5. You may not copy, modify, sublicense, or distribute the Program - except as expressly provided under this License. Any attempt - otherwise to copy, modify, sublicense or distribute the Program is - void, and will automatically terminate your rights under this - License. However, parties who have received copies, or rights, - from you under this License will not have their licenses - terminated so long as such parties remain in full compliance. - - 6. You are not required to accept this License, since you have not - signed it. However, nothing else grants you permission to modify - or distribute the Program or its derivative works. These actions - are prohibited by law if you do not accept this License. - Therefore, by modifying or distributing the Program (or any work - based on the Program), you indicate your acceptance of this - License to do so, and all its terms and conditions for copying, - distributing or modifying the Program or works based on it. - - 7. Each time you redistribute the Program (or any work based on the - Program), the recipient automatically receives a license from the - original licensor to copy, distribute or modify the Program - subject to these terms and conditions. You may not impose any - further restrictions on the recipients' exercise of the rights - granted herein. You are not responsible for enforcing compliance - by third parties to this License. - - 8. If, as a consequence of a court judgment or allegation of patent - infringement or for any other reason (not limited to patent - issues), conditions are imposed on you (whether by court order, - agreement or otherwise) that contradict the conditions of this - License, they do not excuse you from the conditions of this - License. If you cannot distribute so as to satisfy simultaneously - your obligations under this License and any other pertinent - obligations, then as a consequence you may not distribute the - Program at all. For example, if a patent license would not permit - royalty-free redistribution of the Program by all those who - receive copies directly or indirectly through you, then the only - way you could satisfy both it and this License would be to refrain - entirely from distribution of the Program. - - If any portion of this section is held invalid or unenforceable - under any particular circumstance, the balance of the section is - intended to apply and the section as a whole is intended to apply - in other circumstances. - - It is not the purpose of this section to induce you to infringe any - patents or other property right claims or to contest validity of - any such claims; this section has the sole purpose of protecting - the integrity of the free software distribution system, which is - implemented by public license practices. Many people have made - generous contributions to the wide range of software distributed - through that system in reliance on consistent application of that - system; it is up to the author/donor to decide if he or she is - willing to distribute software through any other system and a - licensee cannot impose that choice. - - This section is intended to make thoroughly clear what is believed - to be a consequence of the rest of this License. - - 9. If the distribution and/or use of the Program is restricted in - certain countries either by patents or by copyrighted interfaces, - the original copyright holder who places the Program under this - License may add an explicit geographical distribution limitation - excluding those countries, so that distribution is permitted only - in or among countries not thus excluded. In such case, this - License incorporates the limitation as if written in the body of - this License. - - 10. The Free Software Foundation may publish revised and/or new - versions of the General Public License from time to time. Such - new versions will be similar in spirit to the present version, but - may differ in detail to address new problems or concerns. - - Each version is given a distinguishing version number. If the - Program specifies a version number of this License which applies - to it and "any later version", you have the option of following - the terms and conditions either of that version or of any later - version published by the Free Software Foundation. If the Program - does not specify a version number of this License, you may choose - any version ever published by the Free Software Foundation. - - 11. If you wish to incorporate parts of the Program into other free - programs whose distribution conditions are different, write to the - author to ask for permission. For software which is copyrighted - by the Free Software Foundation, write to the Free Software - Foundation; we sometimes make exceptions for this. Our decision - will be guided by the two goals of preserving the free status of - all derivatives of our free software and of promoting the sharing - and reuse of software generally. - - NO WARRANTY - - 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO - WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE - LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT - WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT - NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND - FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE - QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE - PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY - SERVICING, REPAIR OR CORRECTION. - - 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN - WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY - MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE - LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, - INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR - INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF - DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU - OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY - OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - -Appendix: How to Apply These Terms to Your New Programs -======================================================= - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these -terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES. - Copyright (C) 19YY NAME OF AUTHOR - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - Also add information on how to contact you by electronic and paper -mail. - - If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - - The hypothetical commands `show w' and `show c' should show the -appropriate parts of the General Public License. Of course, the -commands you use may be called something other than `show w' and `show -c'; they could even be mouse-clicks or menu items--whatever suits your -program. - - You should also get your employer (if you work as a programmer) or -your school, if any, to sign a "copyright disclaimer" for the program, -if necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - SIGNATURE OF TY COON, 1 April 1989 - Ty Coon, President of Vice - - This General Public License does not permit incorporating your -program into proprietary programs. If your program is a subroutine -library, you may consider it more useful to permit linking proprietary -applications with the library. If this is what you want to do, use the -GNU Library General Public License instead of this License. - - -File: regex.info, Node: Index, Prev: Copying, Up: Top - -Index -***** - -* Menu: - -* $: Match-end-of-line Operator. -* (: Grouping Operators. -* ): Grouping Operators. -* *: Match-zero-or-more Operator. -* +: Match-one-or-more Operator. -* -: List Operators. -* .: Match-any-character Operator. -* :] in regex: Character Class Operators. -* ?: Match-zero-or-one Operator. -* {: Interval Operators. -* }: Interval Operators. -* [: in regex: Character Class Operators. -* [^: List Operators. -* [: List Operators. -* \': Match-end-of-buffer Operator. -* \<: Match-beginning-of-word Operator. -* \>: Match-end-of-word Operator. -* \{: Interval Operators. -* \}: Interval Operators. -* \b: Match-word-boundary Operator. -* \B: Match-within-word Operator. -* \s: Match-syntactic-class Operator. -* \S: Match-not-syntactic-class Operator. -* \w: Match-word-constituent Operator. -* \W: Match-non-word-constituent Operator. -* \`: Match-beginning-of-buffer Operator. -* \: List Operators. -* ]: List Operators. -* ^: List Operators. -* allocated initialization: GNU Regular Expression Compiling. -* alternation operator: Alternation Operator. -* alternation operator and ^: Match-beginning-of-line Operator. -* anchoring: Anchoring Operators. -* anchors: Match-end-of-line Operator. -* anchors: Match-beginning-of-line Operator. -* Awk: Predefined Syntaxes. -* back references: Back-reference Operator. -* backtracking: Match-zero-or-more Operator. -* backtracking: Alternation Operator. -* beginning-of-line operator: Match-beginning-of-line Operator. -* bracket expression: List Operators. -* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling. -* buffer initialization: GNU Regular Expression Compiling. -* character classes: Character Class Operators. -* Egrep: Predefined Syntaxes. -* Emacs: Predefined Syntaxes. -* end in struct re_registers: Using Registers. -* end-of-line operator: Match-end-of-line Operator. -* fastmap initialization: GNU Regular Expression Compiling. -* fastmaps: Searching with Fastmaps. -* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling. -* Grep: Predefined Syntaxes. -* grouping: Grouping Operators. -* ignoring case: POSIX Regular Expression Compiling. -* interval expression: Interval Operators. -* matching list: List Operators. -* matching newline: List Operators. -* matching with GNU functions: GNU Matching. -* newline_anchor field in pattern buffer: Match-beginning-of-line Operator. -* nonmatching list: List Operators. -* not_bol field in pattern buffer: Match-beginning-of-line Operator. -* num_regs in struct re_registers: Using Registers. -* open-group operator and ^: Match-beginning-of-line Operator. -* or operator: Alternation Operator. -* parenthesizing: Grouping Operators. -* pattern buffer initialization: GNU Regular Expression Compiling. -* pattern buffer, definition of: GNU Pattern Buffers. -* POSIX Awk: Predefined Syntaxes. -* range argument to re_search: GNU Searching. -* regex.c: Overview. -* regex.h: Overview. -* regexp anchoring: Anchoring Operators. -* regmatch_t: Using Byte Offsets. -* regs_allocated: Using Registers. -* REGS_FIXED: Using Registers. -* REGS_REALLOCATE: Using Registers. -* REGS_UNALLOCATED: Using Registers. -* regular expressions, syntax of: Regular Expression Syntax. -* REG_EXTENDED: POSIX Regular Expression Compiling. -* REG_ICASE: POSIX Regular Expression Compiling. -* REG_NEWLINE: POSIX Regular Expression Compiling. -* REG_NOSUB: POSIX Regular Expression Compiling. -* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits. -* RE_BK_PLUS_QM: Syntax Bits. -* RE_CHAR_CLASSES: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits. -* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator. -* RE_CONTEXT_INDEP_OPS: Syntax Bits. -* RE_CONTEXT_INVALID_OPS: Syntax Bits. -* RE_DOT_NEWLINE: Syntax Bits. -* RE_DOT_NOT_NULL: Syntax Bits. -* RE_INTERVALS: Syntax Bits. -* RE_LIMITED_OPS: Syntax Bits. -* RE_NEWLINE_ALT: Syntax Bits. -* RE_NO_BK_BRACES: Syntax Bits. -* RE_NO_BK_PARENS: Syntax Bits. -* RE_NO_BK_REFS: Syntax Bits. -* RE_NO_BK_VBAR: Syntax Bits. -* RE_NO_EMPTY_RANGES: Syntax Bits. -* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling. -* re_pattern_buffer definition: GNU Pattern Buffers. -* re_registers: Using Registers. -* re_syntax_options initialization: GNU Regular Expression Compiling. -* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits. -* searching with GNU functions: GNU Searching. -* start argument to re_search: GNU Searching. -* start in struct re_registers: Using Registers. -* struct re_pattern_buffer definition: GNU Pattern Buffers. -* subexpressions: Grouping Operators. -* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling. -* syntax bits: Syntax Bits. -* syntax initialization: GNU Regular Expression Compiling. -* syntax of regular expressions: Regular Expression Syntax. -* translate initialization: GNU Regular Expression Compiling. -* used field, set by re_compile_pattern: GNU Regular Expression Compiling. -* word boundaries, matching: Match-word-boundary Operator. -* \: The Backslash Character. -* \(: Grouping Operators. -* \): Grouping Operators. -* \|: Alternation Operator. -* ^: Match-beginning-of-line Operator. -* |: Alternation Operator. - - - -Tag Table: -Node: Top1064 -Node: Overview4562 -Node: Regular Expression Syntax6746 -Node: Syntax Bits7916 -Node: Predefined Syntaxes14018 -Node: Collating Elements vs. Characters17872 -Node: The Backslash Character18835 -Node: Common Operators21992 -Node: Match-self Operator23445 -Node: Match-any-character Operator23941 -Node: Concatenation Operator24520 -Node: Repetition Operators25017 -Node: Match-zero-or-more Operator25436 -Node: Match-one-or-more Operator27483 -Node: Match-zero-or-one Operator28341 -Node: Interval Operators29196 -Node: Alternation Operator30991 -Node: List Operators32489 -Node: Character Class Operators35272 -Node: Range Operator36901 -Node: Grouping Operators38930 -Node: Back-reference Operator40251 -Node: Anchoring Operators43073 -Node: Match-beginning-of-line Operator43447 -Node: Match-end-of-line Operator44779 -Node: GNU Operators45518 -Node: Word Operators45767 -Node: Non-Emacs Syntax Tables46391 -Node: Match-word-boundary Operator47465 -Node: Match-within-word Operator47858 -Node: Match-beginning-of-word Operator48255 -Node: Match-end-of-word Operator48588 -Node: Match-word-constituent Operator48908 -Node: Match-non-word-constituent Operator49234 -Node: Buffer Operators49545 -Node: Match-beginning-of-buffer Operator49952 -Node: Match-end-of-buffer Operator50264 -Node: GNU Emacs Operators50558 -Node: Syntactic Class Operators50901 -Node: Emacs Syntax Tables51307 -Node: Match-syntactic-class Operator51963 -Node: Match-not-syntactic-class Operator52560 -Node: What Gets Matched?53150 -Node: Programming with Regex53799 -Node: GNU Regex Functions54237 -Node: GNU Pattern Buffers55078 -Node: GNU Regular Expression Compiling58303 -Node: GNU Matching61181 -Node: GNU Searching63101 -Node: Matching/Searching with Split Data64913 -Node: Searching with Fastmaps66369 -Node: GNU Translate Tables68921 -Node: Using Registers70892 -Node: Freeing GNU Pattern Buffers77000 -Node: POSIX Regex Functions77593 -Node: POSIX Pattern Buffers78266 -Node: POSIX Regular Expression Compiling78709 -Node: POSIX Matching82836 -Node: Reporting Errors84791 -Node: Using Byte Offsets86048 -Node: Freeing POSIX Pattern Buffers86861 -Node: BSD Regex Functions87467 -Node: BSD Regular Expression Compiling87886 -Node: BSD Searching89258 -Node: Copying89960 -Node: Index109122 - -End Tag Table diff --git a/gnu/lib/libregex/doc/regex.texi b/gnu/lib/libregex/doc/regex.texi deleted file mode 100644 index d93953ece20c..000000000000 --- a/gnu/lib/libregex/doc/regex.texi +++ /dev/null @@ -1,3138 +0,0 @@ -\input texinfo -@c %**start of header -@setfilename regex.info -@settitle Regex -@c %**end of header - -@c \\{fill-paragraph} works better (for me, anyway) if the text in the -@c source file isn't indented. -@paragraphindent 2 - -@c Define a new index for our magic constants. -@defcodeindex cn - -@c Put everything in one index (arbitrarily chosen to be the concept index). -@syncodeindex cn cp -@syncodeindex ky cp -@syncodeindex pg cp -@syncodeindex tp cp -@syncodeindex vr cp - -@c Here is what we use in the Info `dir' file: -@c * Regex: (regex). Regular expression library. - - -@ifinfo -This file documents the GNU regular expression library. - -Copyright (C) 1992, 1993 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -@ignore -Permission is granted to process this file through TeX and print the -results, provided the printed document carries a copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual). -@end ignore - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. -@end ifinfo - - -@titlepage - -@title Regex -@subtitle edition 0.12a -@subtitle 19 September 1992 -@author Kathryn A. Hargreaves -@author Karl Berry - -@page - -@vskip 0pt plus 1filll -Copyright @copyright{} 1992 Free Software Foundation. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of this -manual under the conditions for verbatim copying, provided also that the -section entitled ``GNU General Public License'' is included exactly as -in the original, and provided that the entire resulting derived work is -distributed under the terms of a permission notice identical to this -one. - -Permission is granted to copy and distribute translations of this manual -into another language, under the above conditions for modified versions, -except that the section entitled ``GNU General Public License'' may be -included in a translation approved by the Free Software Foundation -instead of in the original English. - -@end titlepage - - -@ifinfo -@node Top, Overview, (dir), (dir) -@top Regular Expression Library - -This manual documents how to program with the GNU regular expression -library. This is edition 0.12a of the manual, 19 September 1992. - -The first part of this master menu lists the major nodes in this Info -document, including the index. The rest of the menu lists all the -lower level nodes in the document. - -@menu -* Overview:: -* Regular Expression Syntax:: -* Common Operators:: -* GNU Operators:: -* GNU Emacs Operators:: -* What Gets Matched?:: -* Programming with Regex:: -* Copying:: Copying and sharing Regex. -* Index:: General index. - --- The Detailed Node Listing --- - -Regular Expression Syntax - -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: - -Common Operators - -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ - -Repetition Operators - -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} - -List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -* Character Class Operators:: [:class:] -* Range Operator:: start-end - -Anchoring Operators - -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ - -GNU Operators - -* Word Operators:: -* Buffer Operators:: - -Word Operators - -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W - -Buffer Operators - -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' - -GNU Emacs Operators - -* Syntactic Class Operators:: - -Syntactic Class Operators - -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS - -Programming with Regex - -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: - -GNU Regex Functions - -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () - -POSIX Regex Functions - -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () - -BSD Regex Functions - -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu -@end ifinfo -@node Overview, Regular Expression Syntax, Top, Top -@chapter Overview - -A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text -string that describes some (mathematical) set of strings. A regexp -@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of -strings described by @var{r}. - -Using the Regex library, you can: - -@itemize @bullet - -@item -see if a string matches a specified pattern as a whole, and - -@item -search within a string for a substring matching a specified pattern. - -@end itemize - -Some regular expressions match only one string, i.e., the set they -describe has only one member. For example, the regular expression -@samp{foo} matches the string @samp{foo} and no others. Other regular -expressions match more than one string, i.e., the set they describe has -more than one member. For example, the regular expression @samp{f*} -matches the set of strings made up of any number (including zero) of -@samp{f}s. As you can see, some characters in regular expressions match -themselves (such as @samp{f}) and some don't (such as @samp{*}); the -ones that don't match themselves instead let you specify patterns that -describe many different strings. - -To either match or search for a regular expression with the Regex -library functions, you must first compile it with a Regex pattern -compiling function. A @dfn{compiled pattern} is a regular expression -converted to the internal format used by the library functions. Once -you've compiled a pattern, you can use it for matching or searching any -number of times. - -The Regex library consists of two source files: @file{regex.h} and -@file{regex.c}. -@pindex regex.h -@pindex regex.c -Regex provides three groups of functions with which you can operate on -regular expressions. One group---the @sc{gnu} group---is more powerful -but not completely compatible with the other two, namely the @sc{posix} -and Berkeley @sc{unix} groups; its interface was designed specifically -for @sc{gnu}. The other groups have the same interfaces as do the -regular expression functions in @sc{posix} and Berkeley -@sc{unix}. - -We wrote this chapter with programmers in mind, not users of -programs---such as Emacs---that use Regex. We describe the Regex -library in its entirety, not how to write regular expressions that a -particular program understands. - - -@node Regular Expression Syntax, Common Operators, Overview, Top -@chapter Regular Expression Syntax - -@cindex regular expressions, syntax of -@cindex syntax of regular expressions - -@dfn{Characters} are things you can type. @dfn{Operators} are things in -a regular expression that match one or more characters. You compose -regular expressions from operators, which in turn you specify using one -or more characters. - -Most characters represent what we call the match-self operator, i.e., -they match themselves; we call these characters @dfn{ordinary}. Other -characters represent either all or parts of fancier operators; e.g., -@samp{.} represents what we call the match-any-character operator -(which, no surprise, matches (almost) any character); we call these -characters @dfn{special}. Two different things determine what -characters represent what operators: - -@enumerate -@item -the regular expression syntax your program has told the Regex library to -recognize, and - -@item -the context of the character in the regular expression. -@end enumerate - -In the following sections, we describe these things in more detail. - -@menu -* Syntax Bits:: -* Predefined Syntaxes:: -* Collating Elements vs. Characters:: -* The Backslash Character:: -@end menu - - -@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax -@section Syntax Bits - -@cindex syntax bits - -In any particular syntax for regular expressions, some characters are -always special, others are sometimes special, and others are never -special. The particular syntax that Regex recognizes for a given -regular expression depends on the value in the @code{syntax} field of -the pattern buffer of that regular expression. - -You get a pattern buffer by compiling a regular expression. @xref{GNU -Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information -on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX -Regular Expression Compiling}, and @ref{BSD Regular Expression -Compiling}, for more information on compiling. - -Regex considers the value of the @code{syntax} field to be a collection -of bits; we refer to these bits as @dfn{syntax bits}. In most cases, -they affect what characters represent what operators. We describe the -meanings of the operators to which we refer in @ref{Common Operators}, -@ref{GNU Operators}, and @ref{GNU Emacs Operators}. - -For reference, here is the complete list of syntax bits, in alphabetical -order: - -@table @code - -@cnindex RE_BACKSLASH_ESCAPE_IN_LIST -@item RE_BACKSLASH_ESCAPE_IN_LISTS -If this bit is set, then @samp{\} inside a list (@pxref{List Operators} -quotes (makes ordinary, if it's special) the following character; if -this bit isn't set, then @samp{\} is an ordinary character inside lists. -(@xref{The Backslash Character}, for what `\' does outside of lists.) - -@cnindex RE_BK_PLUS_QM -@item RE_BK_PLUS_QM -If this bit is set, then @samp{\+} represents the match-one-or-more -operator and @samp{\?} represents the match-zero-or-more operator; if -this bit isn't set, then @samp{+} represents the match-one-or-more -operator and @samp{?} represents the match-zero-or-one operator. This -bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_CHAR_CLASSES -@item RE_CHAR_CLASSES -If this bit is set, then you can use character classes in lists; if this -bit isn't set, then you can't. - -@cnindex RE_CONTEXT_INDEP_ANCHORS -@item RE_CONTEXT_INDEP_ANCHORS -If this bit is set, then @samp{^} and @samp{$} are special anywhere outside -a list; if this bit isn't set, then these characters are special only in -certain contexts. @xref{Match-beginning-of-line Operator}, and -@ref{Match-end-of-line Operator}. - -@cnindex RE_CONTEXT_INDEP_OPS -@item RE_CONTEXT_INDEP_OPS -If this bit is set, then certain characters are special anywhere outside -a list; if this bit isn't set, then those characters are special only in -some contexts and are ordinary elsewhere. Specifically, if this bit -isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS} -isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending -on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators -only if they're not first in a regular expression or just after an -open-group or alternation operator. The same holds for @samp{@{} (or -@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if -it is the beginning of a valid interval and the syntax bit -@code{RE_INTERVALS} is set. - -@cnindex RE_CONTEXT_INVALID_OPS -@item RE_CONTEXT_INVALID_OPS -If this bit is set, then repetition and alternation operators can't be -in certain positions within a regular expression. Specifically, the -regular expression is invalid if it has: - -@itemize @bullet - -@item -a repetition operator first in the regular expression or just after a -match-beginning-of-line, open-group, or alternation operator; or - -@item -an alternation operator first or last in the regular expression, just -before a match-end-of-line operator, or just after an alternation or -open-group operator. - -@end itemize - -If this bit isn't set, then you can put the characters representing the -repetition and alternation characters anywhere in a regular expression. -Whether or not they will in fact be operators in certain positions -depends on other syntax bits. - -@cnindex RE_DOT_NEWLINE -@item RE_DOT_NEWLINE -If this bit is set, then the match-any-character operator matches -a newline; if this bit isn't set, then it doesn't. - -@cnindex RE_DOT_NOT_NULL -@item RE_DOT_NOT_NULL -If this bit is set, then the match-any-character operator doesn't match -a null character; if this bit isn't set, then it does. - -@cnindex RE_INTERVALS -@item RE_INTERVALS -If this bit is set, then Regex recognizes interval operators; if this bit -isn't set, then it doesn't. - -@cnindex RE_LIMITED_OPS -@item RE_LIMITED_OPS -If this bit is set, then Regex doesn't recognize the match-one-or-more, -match-zero-or-one or alternation operators; if this bit isn't set, then -it does. - -@cnindex RE_NEWLINE_ALT -@item RE_NEWLINE_ALT -If this bit is set, then newline represents the alternation operator; if -this bit isn't set, then newline is ordinary. - -@cnindex RE_NO_BK_BRACES -@item RE_NO_BK_BRACES -If this bit is set, then @samp{@{} represents the open-interval operator -and @samp{@}} represents the close-interval operator; if this bit isn't -set, then @samp{\@{} represents the open-interval operator and -@samp{\@}} represents the close-interval operator. This bit is relevant -only if @code{RE_INTERVALS} is set. - -@cnindex RE_NO_BK_PARENS -@item RE_NO_BK_PARENS -If this bit is set, then @samp{(} represents the open-group operator and -@samp{)} represents the close-group operator; if this bit isn't set, then -@samp{\(} represents the open-group operator and @samp{\)} represents -the close-group operator. - -@cnindex RE_NO_BK_REFS -@item RE_NO_BK_REFS -If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as -the back reference operator; if this bit isn't set, then it does. - -@cnindex RE_NO_BK_VBAR -@item RE_NO_BK_VBAR -If this bit is set, then @samp{|} represents the alternation operator; -if this bit isn't set, then @samp{\|} represents the alternation -operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set. - -@cnindex RE_NO_EMPTY_RANGES -@item RE_NO_EMPTY_RANGES -If this bit is set, then a regular expression with a range whose ending -point collates lower than its starting point is invalid; if this bit -isn't set, then Regex considers such a range to be empty. - -@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD -@item RE_UNMATCHED_RIGHT_PAREN_ORD -If this bit is set and the regular expression has no matching open-group -operator, then Regex considers what would otherwise be a close-group -operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}. - -@end table - - -@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax -@section Predefined Syntaxes - -If you're programming with Regex, you can set a pattern buffer's -(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers}) -@code{syntax} field either to an arbitrary combination of syntax bits -(@pxref{Syntax Bits}) or else to the configurations defined by Regex. -These configurations define the syntaxes used by certain -programs---@sc{gnu} Emacs, -@cindex Emacs -@sc{posix} Awk, -@cindex POSIX Awk -traditional Awk, -@cindex Awk -Grep, -@cindex Grep -@cindex Egrep -Egrep---in addition to syntaxes for @sc{posix} basic and extended -regular expressions. - -The predefined syntaxes--taken directly from @file{regex.h}---are: - -@example -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS - replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -@end example - -@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax -@section Collating Elements vs.@: Characters - -@sc{posix} generalizes the notion of a character to that of a -collating element. It defines a @dfn{collating element} to be ``a -sequence of one or more bytes defined in the current collating sequence -as a unit of collation.'' - -This generalizes the notion of a character in -two ways. First, a single character can map into two or more collating -elements. For example, the German -@tex -`\ss' -@end tex -@ifinfo -``es-zet'' -@end ifinfo -collates as the collating element @samp{s} followed by another collating -element @samp{s}. Second, two or more characters can map into one -collating element. For example, the Spanish @samp{ll} collates after -@samp{l} and before @samp{m}. - -Since @sc{posix}'s ``collating element'' preserves the essential idea of -a ``character,'' we use the latter, more familiar, term in this document. - -@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax -@section The Backslash Character - -@cindex \ -The @samp{\} character has one of four different meanings, depending on -the context in which you use it and what syntax bits are set -(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next -character, 3) introduce an operator, or 4) do nothing. - -@enumerate -@item -It stands for itself inside a list -(@pxref{List Operators}) if the syntax bit -@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]} -would match @samp{\}. - -@item -It quotes (makes ordinary, if it's special) the next character when you -use it either: - -@itemize @bullet -@item -outside a list,@footnote{Sometimes -you don't have to explicitly quote special characters to make -them ordinary. For instance, most characters lose any special meaning -inside a list (@pxref{List Operators}). In addition, if the syntax bits -@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS} -aren't set, then (for historical reasons) the matcher considers special -characters ordinary if they are in contexts where the operations they -represent make no sense; for example, then the match-zero-or-more -operator (represented by @samp{*}) matches itself in the regular -expression @samp{*foo} because there is no preceding expression on which -it can operate. It is poor practice, however, to depend on this -behavior; if you want a special character to be ordinary outside a list, -it's better to always quote it, regardless.} or - -@item -inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set. - -@end itemize - -@item -It introduces an operator when followed by certain ordinary -characters---sometimes only when certain syntax bits are set. See the -cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR}, -@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also: - -@itemize @bullet -@item -@samp{\b} represents the match-word-boundary operator -(@pxref{Match-word-boundary Operator}). - -@item -@samp{\B} represents the match-within-word operator -(@pxref{Match-within-word Operator}). - -@item -@samp{\<} represents the match-beginning-of-word operator @* -(@pxref{Match-beginning-of-word Operator}). - -@item -@samp{\>} represents the match-end-of-word operator -(@pxref{Match-end-of-word Operator}). - -@item -@samp{\w} represents the match-word-constituent operator -(@pxref{Match-word-constituent Operator}). - -@item -@samp{\W} represents the match-non-word-constituent operator -(@pxref{Match-non-word-constituent Operator}). - -@item -@samp{\`} represents the match-beginning-of-buffer -operator and @samp{\'} represents the match-end-of-buffer operator -(@pxref{Buffer Operators}). - -@item -If Regex was compiled with the C preprocessor symbol @code{emacs} -defined, then @samp{\s@var{class}} represents the match-syntactic-class -operator and @samp{\S@var{class}} represents the -match-not-syntactic-class operator (@pxref{Syntactic Class Operators}). - -@end itemize - -@item -In all other cases, Regex ignores @samp{\}. For example, -@samp{\n} matches @samp{n}. - -@end enumerate - -@node Common Operators, GNU Operators, Regular Expression Syntax, Top -@chapter Common Operators - -You compose regular expressions from operators. In the following -sections, we describe the regular expression operators specified by -@sc{posix}; @sc{gnu} also uses these. Most operators have more than one -representation as characters. @xref{Regular Expression Syntax}, for -what characters represent what operators under what circumstances. - -For most operators that can be represented in two ways, one -representation is a single character and the other is that character -preceded by @samp{\}. For example, either @samp{(} or @samp{\(} -represents the open-group operator. Which one does depends on the -setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is -this so? Historical reasons dictate some of the varying -representations, while @sc{posix} dictates others. - -Finally, almost all characters lose any special meaning inside a list -(@pxref{List Operators}). - -@menu -* Match-self Operator:: Ordinary characters. -* Match-any-character Operator:: . -* Concatenation Operator:: Juxtaposition. -* Repetition Operators:: * + ? @{@} -* Alternation Operator:: | -* List Operators:: [...] [^...] -* Grouping Operators:: (...) -* Back-reference Operator:: \digit -* Anchoring Operators:: ^ $ -@end menu - -@node Match-self Operator, Match-any-character Operator, , Common Operators -@section The Match-self Operator (@var{ordinary character}) - -This operator matches the character itself. All ordinary characters -(@pxref{Regular Expression Syntax}) represent this operator. For -example, @samp{f} is always an ordinary character, so the regular -expression @samp{f} matches only the string @samp{f}. In -particular, it does @emph{not} match the string @samp{ff}. - -@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators -@section The Match-any-character Operator (@code{.}) - -@cindex @samp{.} - -This operator matches any single printing or nonprinting character -except it won't match a: - -@table @asis -@item newline -if the syntax bit @code{RE_DOT_NEWLINE} isn't set. - -@item null -if the syntax bit @code{RE_DOT_NOT_NULL} is set. - -@end table - -The @samp{.} (period) character represents this operator. For example, -@samp{a.b} matches any three-character string beginning with @samp{a} -and ending with @samp{b}. - -@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators -@section The Concatenation Operator - -This operator concatenates two regular expressions @var{a} and @var{b}. -No character represents this operator; you simply put @var{b} after -@var{a}. The result is a regular expression that will match a string if -@var{a} matches its first part and @var{b} matches the rest. For -example, @samp{xy} (two match-self operators) matches @samp{xy}. - -@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators -@section Repetition Operators - -Repetition operators repeat the preceding regular expression a specified -number of times. - -@menu -* Match-zero-or-more Operator:: * -* Match-one-or-more Operator:: + -* Match-zero-or-one Operator:: ? -* Interval Operators:: @{@} -@end menu - -@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators -@subsection The Match-zero-or-more Operator (@code{*}) - -@cindex @samp{*} - -This operator repeats the smallest possible preceding regular expression -as many times as necessary (including zero) to match the pattern. -@samp{*} represents this operator. For example, @samp{o*} -matches any string made up of zero or more @samp{o}s. Since this -operator operates on the smallest preceding regular expression, -@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So, -@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on. - -Since the match-zero-or-more operator is a suffix operator, it may be -useless as such when no regular expression precedes it. This is the -case when it: - -@itemize @bullet -@item -is first in a regular expression, or - -@item -follows a match-beginning-of-line, open-group, or alternation -operator. - -@end itemize - -@noindent -Three different things can happen in these cases: - -@enumerate -@item -If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the -regular expression is invalid. - -@item -If @code{RE_CONTEXT_INVALID_OPS} isn't set, but -@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the -match-zero-or-more operator (which then operates on the empty string). - -@item -Otherwise, @samp{*} is ordinary. - -@end enumerate - -@cindex backtracking -The matcher processes a match-zero-or-more operator by first matching as -many repetitions of the smallest preceding regular expression as it can. -Then it continues to match the rest of the pattern. - -If it can't match the rest of the pattern, it backtracks (as many times -as necessary), each time discarding one of the matches until it can -either match the entire pattern or be certain that it cannot get a -match. For example, when matching @samp{ca*ar} against @samp{caaar}, -the matcher first matches all three @samp{a}s of the string with the -@samp{a*} of the regular expression. However, it cannot then match the -final @samp{ar} of the regular expression against the final @samp{r} of -the string. So it backtracks, discarding the match of the last @samp{a} -in the string. It can then match the remaining @samp{ar}. - - -@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators -@subsection The Match-one-or-more Operator (@code{+} or @code{\+}) - -@cindex @samp{+} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize -this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't -set, then @samp{+} represents this operator; if it is, then @samp{\+} -does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression at least once; -@pxref{Match-zero-or-more Operator}, for what it operates on, how some -syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{+} represents the match-one-or-more -operator; then @samp{ca+r} matches, e.g., @samp{car} and -@samp{caaaar}, but not @samp{cr}. - -@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators -@subsection The Match-zero-or-one Operator (@code{?} or @code{\?}) -@cindex @samp{?} - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator; -if it is, then @samp{\?} does. - -This operator is similar to the match-zero-or-more operator except that -it repeats the preceding regular expression once or not at all; -@pxref{Match-zero-or-more Operator}, to see what it operates on, how -some syntax bits affect it, and how Regex backtracks to match it. - -For example, supposing that @samp{?} represents the match-zero-or-one -operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but -nothing else. - -@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators -@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}}) - -@cindex interval expression -@cindex @samp{@{} -@cindex @samp{@}} -@cindex @samp{\@{} -@cindex @samp{\@}} - -If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes -@dfn{interval expressions}. They repeat the smallest possible preceding -regular expression a specified number of times. - -If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents -the @dfn{open-interval operator} and @samp{@}} represents the -@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do. - -Specifically, supposing that @samp{@{} and @samp{@}} represent the -open-interval and close-interval operators; then: - -@table @code -@item @{@var{count}@} -matches exactly @var{count} occurrences of the preceding regular -expression. - -@item @{@var{min,}@} -matches @var{min} or more occurrences of the preceding regular -expression. - -@item @{@var{min, max}@} -matches at least @var{min} but no more than @var{max} occurrences of -the preceding regular expression. - -@end table - -The interval expression (but not necessarily the regular expression that -contains it) is invalid if: - -@itemize @bullet -@item -@var{min} is greater than @var{max}, or - -@item -any of @var{count}, @var{min}, or @var{max} are outside the range -zero to @code{RE_DUP_MAX} (which symbol @file{regex.h} -defines). - -@end itemize - -If the interval expression is invalid and the syntax bit -@code{RE_NO_BK_BRACES} is set, then Regex considers all the -characters in the would-be interval to be ordinary. If that bit -isn't set, then the regular expression is invalid. - -If the interval expression is valid but there is no preceding regular -expression on which to operate, then if the syntax bit -@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid. -If that bit isn't set, then Regex considers all the characters---other -than backslashes, which it ignores---in the would-be interval to be -ordinary. - - -@node Alternation Operator, List Operators, Repetition Operators, Common Operators -@section The Alternation Operator (@code{|} or @code{\|}) - -@kindex | -@kindex \| -@cindex alternation operator -@cindex or operator - -If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't -recognize this operator. Otherwise, if the syntax bit -@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator; -otherwise, @samp{\|} does. - -Alternatives match one of a choice of regular expressions: -if you put the character(s) representing the alternation operator between -any two regular expressions @var{a} and @var{b}, the result matches -the union of the strings that @var{a} and @var{b} match. For -example, supposing that @samp{|} is the alternation operator, then -@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or -@samp{quux}. - -@ignore -@c Nobody needs to disallow empty alternatives any more. -If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular -expressions @var{a} or @var{b} is empty, the -regular expression is invalid. More precisely, if this syntax bit is -set, then the alternation operator can't: - -@itemize @bullet -@item -be first or last in a regular expression; - -@item -follow either another alternation operator or an open-group operator -(@pxref{Grouping Operators}); or - -@item -precede a close-group operator. - -@end itemize - -@noindent -For example, supposing @samp{(} and @samp{)} represent the open and -close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar}, -@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid. -@end ignore - -The alternation operator operates on the @emph{largest} possible -surrounding regular expressions. (Put another way, it has the lowest -precedence of any regular expression operator.) -Thus, the only way you can -delimit its arguments is to use grouping. For example, if @samp{(} and -@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar} -would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would -match @samp{foo} or @samp{bar}.) - -@cindex backtracking -The matcher usually tries all combinations of alternatives so as to -match the longest possible string. For example, when matching -@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot -take, say, the first (``depth-first'') combination it could match, since -then it would be content to match just @samp{fooqbar}. - -@comment xx something about leftmost-longest - - -@node List Operators, Grouping Operators, Alternation Operator, Common Operators -@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]}) - -@cindex matching list -@cindex @samp{[} -@cindex @samp{]} -@cindex @samp{^} -@cindex @samp{-} -@cindex @samp{\} -@cindex @samp{[^} -@cindex nonmatching list -@cindex matching newline -@cindex bracket expression - -@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or -more items. An @dfn{item} is a character, -@ignore -(These get added when they get implemented.) -a collating symbol, an equivalence class expression, -@end ignore -a character class expression, or a range expression. The syntax bits -affect which kinds of items you can put in a list. We explain the last -two items in subsections below. Empty lists are invalid. - -A @dfn{matching list} matches a single character represented by one of -the list items. You form a matching list by enclosing one or more items -within an @dfn{open-matching-list operator} (represented by @samp{[}) -and a @dfn{close-list operator} (represented by @samp{]}). - -For example, @samp{[ab]} matches either @samp{a} or @samp{b}. -@samp{[ad]*} matches the empty string and any string composed of just -@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular -expression with a @samp{[} but no matching -@samp{]}. - -@dfn{Nonmatching lists} are similar to matching lists except that they -match a single character @emph{not} represented by one of the list -items. You use an @dfn{open-nonmatching-list operator} (represented by -@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be -the first character in the list. If you put a @samp{^} character first -in (what you think is) a matching list, you'll turn it into a -nonmatching list.}) instead of an open-matching-list operator to start a -nonmatching list. - -For example, @samp{[^ab]} matches any character except @samp{a} or -@samp{b}. - -If the @code{posix_newline} field in the pattern buffer (@pxref{GNU -Pattern Buffers} is set, then nonmatching lists do not match a newline. - -Most characters lose any special meaning inside a list. The special -characters inside a list follow. - -@table @samp -@item ] -ends the list if it's not the first list item. So, if you want to make -the @samp{]} character a list item, you must put it first. - -@item \ -quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is -set. - -@ignore -Put these in if they get implemented. - -@item [. -represents the open-collating-symbol operator (@pxref{Collating Symbol -Operators}). - -@item .] -represents the close-collating-symbol operator. - -@item [= -represents the open-equivalence-class operator (@pxref{Equivalence Class -Operators}). - -@item =] -represents the close-equivalence-class operator. - -@end ignore - -@item [: -represents the open-character-class operator (@pxref{Character Class -Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what -follows is a valid character class expression. - -@item :] -represents the close-character-class operator if the syntax bit -@code{RE_CHAR_CLASSES} is set and what precedes it is an -open-character-class operator followed by a valid character class name. - -@item - -represents the range operator (@pxref{Range Operator}) if it's -not first or last in a list or the ending point of a range. - -@end table - -@noindent -All other characters are ordinary. For example, @samp{[.*]} matches -@samp{.} and @samp{*}. - -@menu -* Character Class Operators:: [:class:] -* Range Operator:: start-end -@end menu - -@ignore -(If collating symbols and equivalence class expressions get implemented, -then add this.) - -node Collating Symbol Operators -subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]}) - -If the syntax bit @code{XX} is set, then you can represent -collating symbols inside lists. You form a @dfn{collating symbol} by -putting a collating element between an @dfn{open-collating-symbol -operator} and an @dfn{close-collating-symbol operator}. @samp{[.} -represents the open-collating-symbol operator and @samp{.]} represents -the close-collating-symbol operator. For example, if @samp{ll} is a -collating element, then @samp{[[.ll.]]} would match @samp{ll}. - -node Equivalence Class Operators -subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]}) -@cindex equivalence class expression in regex -@cindex @samp{[=} in regex -@cindex @samp{=]} in regex - -If the syntax bit @code{XX} is set, then Regex recognizes equivalence class -expressions inside lists. A @dfn{equivalence class expression} is a set -of collating elements which all belong to the same equivalence class. -You form an equivalence class expression by putting a collating -element between an @dfn{open-equivalence-class operator} and a -@dfn{close-equivalence-class operator}. @samp{[=} represents the -open-equivalence-class operator and @samp{=]} represents the -close-equivalence-class operator. For example, if @samp{a} and @samp{A} -were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]} -would match both @samp{a} and @samp{A}. If the collating element in an -equivalence class expression isn't part of an equivalence class, then -the matcher considers the equivalence class expression to be a collating -symbol. - -@end ignore - -@node Character Class Operators, Range Operator, , List Operators -@subsection Character Class Operators (@code{[:} @dots{} @code{:]}) - -@cindex character classes -@cindex @samp{[:} in regex -@cindex @samp{:]} in regex - -If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex -recognizes character class expressions inside lists. A @dfn{character -class expression} matches one character from a given class. You form a -character class expression by putting a character class name between an -@dfn{open-character-class operator} (represented by @samp{[:}) and a -@dfn{close-character-class operator} (represented by @samp{:]}). The -character class names and their meanings are: - -@table @code - -@item alnum -letters and digits - -@item alpha -letters - -@item blank -system-dependent; for @sc{gnu}, a space or tab - -@item cntrl -control characters (in the @sc{ascii} encoding, code 0177 and codes -less than 040) - -@item digit -digits - -@item graph -same as @code{print} except omits space - -@item lower -lowercase letters - -@item print -printable characters (in the @sc{ascii} encoding, space -tilde---codes 040 through 0176) - -@item punct -neither control nor alphanumeric characters - -@item space -space, carriage return, newline, vertical tab, and form feed - -@item upper -uppercase letters - -@item xdigit -hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F} - -@end table - -@noindent -These correspond to the definitions in the C library's @file{<ctype.h>} -facility. For example, @samp{[:alpha:]} corresponds to the standard -facility @code{isalpha}. Regex recognizes character class expressions -only inside of lists; so @samp{[[:alpha:]]} matches any letter, but -@samp{[:alpha:]} outside of a bracket expression and not followed by a -repetition operator matches just itself. - -@node Range Operator, , Character Class Operators, List Operators -@subsection The Range Operator (@code{-}) - -Regex recognizes @dfn{range expressions} inside a list. They represent -those characters -that fall between two elements in the current collating sequence. You -form a range expression by putting a @dfn{range operator} between two -@ignore -(If these get implemented, then substitute this for ``characters.'') -of any of the following: characters, collating elements, collating symbols, -and equivalence class expressions. The starting point of the range and -the ending point of the range don't have to be the same kind of item, -e.g., the starting point could be a collating element and the ending -point could be an equivalence class expression. If a range's ending -point is an equivalence class, then all the collating elements in that -class will be in the range. -@end ignore -characters.@footnote{You can't use a character class for the starting -or ending point of a range, since a character class is not a single -character.} @samp{-} represents the range operator. For example, -@samp{a-f} within a list represents all the characters from @samp{a} -through @samp{f} -inclusively. - -If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's -ending point collates less than its starting point, the range (and the -regular expression containing it) is invalid. For example, the regular -expression @samp{[z-a]} would be invalid. If this bit isn't set, then -Regex considers such a range to be empty. - -Since @samp{-} represents the range operator, if you want to make a -@samp{-} character itself -a list item, you must do one of the following: - -@itemize @bullet -@item -Put the @samp{-} either first or last in the list. - -@item -Include a range whose starting point collates strictly lower than -@samp{-} and whose ending point collates equal or higher. Unless a -range is the first item in a list, a @samp{-} can't be its starting -point, but @emph{can} be its ending point. That is because Regex -considers @samp{-} to be the range operator unless it is preceded by -another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)}, -@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are -contiguous characters in the collating sequence. You might think that -@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it -has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so -it matches, e.g., @samp{,}, not @samp{.}. - -@item -Put a range whose starting point is @samp{-} first in the list. - -@end itemize - -For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in -English, in @sc{ascii}). - - -@node Grouping Operators, Back-reference Operator, List Operators, Common Operators -@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)}) - -@kindex ( -@kindex ) -@kindex \( -@kindex \) -@cindex grouping -@cindex subexpressions -@cindex parenthesizing - -A @dfn{group}, also known as a @dfn{subexpression}, consists of an -@dfn{open-group operator}, any number of other operators, and a -@dfn{close-group operator}. Regex treats this sequence as a unit, just -as mathematics and programming languages treat a parenthesized -expression as a unit. - -Therefore, using @dfn{groups}, you can: - -@itemize @bullet -@item -delimit the argument(s) to an alternation operator (@pxref{Alternation -Operator}) or a repetition operator (@pxref{Repetition -Operators}). - -@item -keep track of the indices of the substring that matched a given group. -@xref{Using Registers}, for a precise explanation. -This lets you: - -@itemize @bullet -@item -use the back-reference operator (@pxref{Back-reference Operator}). - -@item -use registers (@pxref{Using Registers}). - -@end itemize - -@end itemize - -If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents -the open-group operator and @samp{)} represents the -close-group operator; otherwise, @samp{\(} and @samp{\)} do. - -If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a -close-group operator has no matching open-group operator, then Regex -considers it to match @samp{)}. - - -@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators -@section The Back-reference Operator (@dfn{\}@var{digit}) - -@cindex back references - -If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes -back references. A back reference matches a specified preceding group. -The back reference operator is represented by @samp{\@var{digit}} -anywhere after the end of a regular expression's @w{@var{digit}-th} -group (@pxref{Grouping Operators}). - -@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns -numbers 1 through 9 to the first nine groups it encounters. By using -one of @samp{\1} through @samp{\9} after the corresponding group's -close-group operator, you can match a substring identical to the -one that the group does. - -Back references match according to the following (in all examples below, -@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{} -the open-interval and @samp{@}} the close-interval operator): - -@itemize @bullet -@item -If the group matches a substring, the back reference matches an -identical substring. For example, @samp{(a)\1} matches @samp{aa} and -@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise, -@samp{(.*)\1} matches any (newline-free if the syntax bit -@code{RE_DOT_NEWLINE} isn't set) string that is composed of two -identical halves; the @samp{(.*)} matches the first half and the -@samp{\1} matches the second half. - -@item -If the group matches more than once (as it might if followed -by, e.g., a repetition operator), then the back reference matches the -substring the group @emph{last} matched. For example, -@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the -outer one) matches @samp{aab} and @w{group 2} (the inner one) matches -@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches -@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches -@samp{a}. - -@item -If the group doesn't participate in a match, i.e., it is part of an -alternative not taken or a repetition operator allows zero repetitions -of it, then the back reference makes the whole match fail. For example, -@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three} -and @samp{two-and-four}, but not @samp{one-and-four} or -@samp{two-and-three}. For example, if the pattern matches -@samp{one-and-}, then its @w{group 2} matches the empty string and its -@w{group 3} doesn't participate in the match. So, if it then matches -@samp{four}, then when it tries to back reference @w{group 3}---which it -will attempt to do because @samp{\3} follows the @samp{four}---the match -will fail because @w{group 3} didn't participate in the match. - -@end itemize - -You can use a back reference as an argument to a repetition operator. For -example, @samp{(a(b))\2*} matches @samp{a} followed by two or more -@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}. - -If there is no preceding @w{@var{digit}-th} subexpression, the regular -expression is invalid. - - -@node Anchoring Operators, , Back-reference Operator, Common Operators -@section Anchoring Operators - -@cindex anchoring -@cindex regexp anchoring - -These operators can constrain a pattern to match only at the beginning or -end of the entire string or at the beginning or end of a line. - -@menu -* Match-beginning-of-line Operator:: ^ -* Match-end-of-line Operator:: $ -@end menu - - -@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators -@subsection The Match-beginning-of-line Operator (@code{^}) - -@kindex ^ -@cindex beginning-of-line operator -@cindex anchors - -This operator can match the empty string either at the beginning of the -string or after a newline character. Thus, it is said to @dfn{anchor} -the pattern to the beginning of a line. - -In the cases following, @samp{^} represents this operator. (Otherwise, -@samp{^} is ordinary.) - -@itemize @bullet - -@item -It (the @samp{^}) is first in the pattern, as in @samp{^foo}. - -@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})} -@item -The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside -a bracket expression. - -@cindex open-group operator and @samp{^} -@cindex alternation operator and @samp{^} -@item -It follows an open-group or alternation operator, as in @samp{a\(^b\)} -and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation -Operator}. - -@end itemize - -These rules imply that some valid patterns containing @samp{^} cannot be -matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS} -is set. - -@vindex not_bol @r{field in pattern buffer} -If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU -Pattern Buffers}), then @samp{^} fails to match at the beginning of the -string. @xref{POSIX Matching}, for when you might find this useful. - -@vindex newline_anchor @r{field in pattern buffer} -If the @code{newline_anchor} field is set in the pattern buffer, then -@samp{^} fails to match after a newline. This is useful when you do not -regard the string to be matched as broken into lines. - - -@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators -@subsection The Match-end-of-line Operator (@code{$}) - -@kindex $ -@cindex end-of-line operator -@cindex anchors - -This operator can match the empty string either at the end of -the string or before a newline character in the string. Thus, it is -said to @dfn{anchor} the pattern to the end of a line. - -It is always represented by @samp{$}. For example, @samp{foo$} usually -matches, e.g., @samp{foo} and, e.g., the first three characters of -@samp{foo\nbar}. - -Its interaction with the syntax bits and pattern buffer fields is -exactly the dual of @samp{^}'s; see the previous section. (That is, -``beginning'' becomes ``end'', ``next'' becomes ``previous'', and -``after'' becomes ``before''.) - - -@node GNU Operators, GNU Emacs Operators, Common Operators, Top -@chapter GNU Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't). - -@menu -* Word Operators:: -* Buffer Operators:: -@end menu - -@node Word Operators, Buffer Operators, , GNU Operators -@section Word Operators - -The operators in this section require Regex to recognize parts of words. -Regex uses a syntax table to determine whether or not a character is -part of a word, i.e., whether or not it is @dfn{word-constituent}. - -@menu -* Non-Emacs Syntax Tables:: -* Match-word-boundary Operator:: \b -* Match-within-word Operator:: \B -* Match-beginning-of-word Operator:: \< -* Match-end-of-word Operator:: \> -* Match-word-constituent Operator:: \w -* Match-non-word-constituent Operator:: \W -@end menu - -@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators -@subsection Non-Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. Regex always uses a @code{char *} variable -@code{re_syntax_table} as its syntax table. In some cases, it -initializes this variable and in others it expects you to initialize it. - -@itemize @bullet -@item -If Regex is compiled with the preprocessor symbols @code{emacs} and -@code{SYNTAX_TABLE} both undefined, then Regex allocates -@code{re_syntax_table} and initializes an element @var{i} either to -@code{Sword} (which it defines) if @var{i} is a letter, number, or -@samp{_}, or to zero if it's not. - -@item -If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE} -defined, then Regex expects you to define a @code{char *} variable -@code{re_syntax_table} to be a valid syntax table. - -@item -@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with -the preprocessor symbol @code{emacs} defined. - -@end itemize - -@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators -@subsection The Match-word-boundary Operator (@code{\b}) - -@cindex @samp{\b} -@cindex word boundaries, matching - -This operator (represented by @samp{\b}) matches the empty string at -either the beginning or the end of a word. For example, @samp{\brat\b} -matches the separate word @samp{rat}. - -@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators -@subsection The Match-within-word Operator (@code{\B}) - -@cindex @samp{\B} - -This operator (represented by @samp{\B}) matches the empty string within -a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but -@samp{dirty \Brat} doesn't match @samp{dirty rat}. - -@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators -@subsection The Match-beginning-of-word Operator (@code{\<}) - -@cindex @samp{\<} - -This operator (represented by @samp{\<}) matches the empty string at the -beginning of a word. - -@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators -@subsection The Match-end-of-word Operator (@code{\>}) - -@cindex @samp{\>} - -This operator (represented by @samp{\>}) matches the empty string at the -end of a word. - -@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators -@subsection The Match-word-constituent Operator (@code{\w}) - -@cindex @samp{\w} - -This operator (represented by @samp{\w}) matches any word-constituent -character. - -@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators -@subsection The Match-non-word-constituent Operator (@code{\W}) - -@cindex @samp{\W} - -This operator (represented by @samp{\W}) matches any character that is -not word-constituent. - - -@node Buffer Operators, , Word Operators, GNU Operators -@section Buffer Operators - -Following are operators which work on buffers. In Emacs, a @dfn{buffer} -is, naturally, an Emacs buffer. For other programs, Regex considers the -entire string to be matched as the buffer. - -@menu -* Match-beginning-of-buffer Operator:: \` -* Match-end-of-buffer Operator:: \' -@end menu - - -@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators -@subsection The Match-beginning-of-buffer Operator (@code{\`}) - -@cindex @samp{\`} - -This operator (represented by @samp{\`}) matches the empty string at the -beginning of the buffer. - -@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators -@subsection The Match-end-of-buffer Operator (@code{\'}) - -@cindex @samp{\'} - -This operator (represented by @samp{\'}) matches the empty string at the -end of the buffer. - - -@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top -@chapter GNU Emacs Operators - -Following are operators that @sc{gnu} defines (and @sc{posix} doesn't) -that you can use only when Regex is compiled with the preprocessor -symbol @code{emacs} defined. - -@menu -* Syntactic Class Operators:: -@end menu - - -@node Syntactic Class Operators, , , GNU Emacs Operators -@section Syntactic Class Operators - -The operators in this section require Regex to recognize the syntactic -classes of characters. Regex uses a syntax table to determine this. - -@menu -* Emacs Syntax Tables:: -* Match-syntactic-class Operator:: \sCLASS -* Match-not-syntactic-class Operator:: \SCLASS -@end menu - -@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators -@subsection Emacs Syntax Tables - -A @dfn{syntax table} is an array indexed by the characters in your -character set. In the @sc{ascii} encoding, therefore, a syntax table -has 256 elements. - -If Regex is compiled with the preprocessor symbol @code{emacs} defined, -then Regex expects you to define and initialize the variable -@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax -tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax -Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual}, -for a description of Emacs' syntax tables. - -@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators -@subsection The Match-syntactic-class Operator (@code{\s}@var{class}) - -@cindex @samp{\s} - -This operator matches any character whose syntactic class is represented -by a specified character. @samp{\s@var{class}} represents this operator -where @var{class} is the character representing the syntactic class you -want. For example, @samp{w} represents the syntactic -class of word-constituent characters, so @samp{\sw} matches any -word-constituent character. - -@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators -@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class}) - -@cindex @samp{\S} - -This operator is similar to the match-syntactic-class operator except -that it matches any character whose syntactic class is @emph{not} -represented by the specified character. @samp{\S@var{class}} represents -this operator. For example, @samp{w} represents the syntactic class of -word-constituent characters, so @samp{\Sw} matches any character that is -not word-constituent. - - -@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top -@chapter What Gets Matched? - -Regex usually matches strings according to the ``leftmost longest'' -rule; that is, it chooses the longest of the leftmost matches. This -does not mean that for a regular expression containing subexpressions -that it simply chooses the longest match for each subexpression, left to -right; the overall match must also be the longest possible one. - -For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not -@samp{acdac}, as it would if it were to choose the longest match for the -first subexpression. - - -@node Programming with Regex, Copying, What Gets Matched?, Top -@chapter Programming with Regex - -Here we describe how you use the Regex data structures and functions in -C programs. Regex has three interfaces: one designed for @sc{gnu}, one -compatible with @sc{posix} and one compatible with Berkeley @sc{unix}. - -@menu -* GNU Regex Functions:: -* POSIX Regex Functions:: -* BSD Regex Functions:: -@end menu - - -@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex -@section GNU Regex Functions - -If you're writing code that doesn't need to be compatible with either -@sc{posix} or Berkeley @sc{unix}, you can use these functions. They -provide more options than the other interfaces. - -@menu -* GNU Pattern Buffers:: The re_pattern_buffer type. -* GNU Regular Expression Compiling:: re_compile_pattern () -* GNU Matching:: re_match () -* GNU Searching:: re_search () -* Matching/Searching with Split Data:: re_match_2 (), re_search_2 () -* Searching with Fastmaps:: re_compile_fastmap () -* GNU Translate Tables:: The `translate' field. -* Using Registers:: The re_registers type and related fns. -* Freeing GNU Pattern Buffers:: regfree () -@end menu - - -@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions -@subsection GNU Pattern Buffers - -@cindex pattern buffer, definition of -@tindex re_pattern_buffer @r{definition} -@tindex struct re_pattern_buffer @r{definition} - -To compile, match, or search for a given regular expression, you must -supply a pattern buffer. A @dfn{pattern buffer} holds one compiled -regular expression.@footnote{Regular expressions are also referred to as -``patterns,'' hence the name ``pattern buffer.''} - -You can have several different pattern buffers simultaneously, each -holding a compiled pattern for a different regular expression. - -@file{regex.h} defines the pattern buffer @code{struct} as follows: - -@example - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - char *translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -@end example - - -@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions -@subsection GNU Regular Expression Compiling - -In @sc{gnu}, you can both match and search for a given regular -expression. To do either, you must first compile it in a pattern buffer -(@pxref{GNU Pattern Buffers}). - -@cindex syntax initialization -@vindex re_syntax_options @r{initialization} -Regular expressions match according to the syntax with which they were -compiled; with @sc{gnu}, you indicate what syntax you want by setting -the variable @code{re_syntax_options} (declared in @file{regex.h} and -defined in @file{regex.c}) before calling the compiling function, -@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and -@ref{Predefined Syntaxes}. - -You can change the value of @code{re_syntax_options} at any time. -Usually, however, you set its value once and then never change it. - -@cindex pattern buffer initialization -@code{re_compile_pattern} takes a pattern buffer as an argument. You -must initialize the following fields: - -@table @code - -@item translate @r{initialization} - -@item translate -@vindex translate @r{initialization} -Initialize this to point to a translate table if you want one, or to -zero if you don't. We explain translate tables in @ref{GNU Translate -Tables}. - -@item fastmap -@vindex fastmap @r{initialization} -Initialize this to nonzero if you want a fastmap, or to zero if you -don't. - -@item buffer -@itemx allocated -@vindex buffer @r{initialization} -@vindex allocated @r{initialization} -@findex malloc -If you want @code{re_compile_pattern} to allocate memory for the -compiled pattern, set both of these to zero. If you have an existing -block of memory (allocated with @code{malloc}) you want Regex to use, -set @code{buffer} to its address and @code{allocated} to its size (in -bytes). - -@code{re_compile_pattern} uses @code{realloc} to extend the space for -the compiled pattern as necessary. - -@end table - -To compile a pattern buffer, use: - -@findex re_compile_pattern -@example -char * -re_compile_pattern (const char *@var{regex}, const int @var{regex_size}, - struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{regex} is the regular expression's address, @var{regex_size} is its -length, and @var{pattern_buffer} is the pattern buffer's address. - -If @code{re_compile_pattern} successfully compiles the regular -expression, it returns zero and sets @code{*@var{pattern_buffer}} to the -compiled pattern. It sets the pattern buffer's fields as follows: - -@table @code -@item buffer -@vindex buffer @r{field, set by @code{re_compile_pattern}} -to the compiled pattern. - -@item used -@vindex used @r{field, set by @code{re_compile_pattern}} -to the number of bytes the compiled pattern in @code{buffer} occupies. - -@item syntax -@vindex syntax @r{field, set by @code{re_compile_pattern}} -to the current value of @code{re_syntax_options}. - -@item re_nsub -@vindex re_nsub @r{field, set by @code{re_compile_pattern}} -to the number of subexpressions in @var{regex}. - -@item fastmap_accurate -@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}} -to zero on the theory that the pattern you're compiling is different -than the one previously compiled into @code{buffer}; in that case (since -you can't make a fastmap without a compiled pattern), -@code{fastmap} would either contain an incompatible fastmap, or nothing -at all. - -@c xx what else? -@end table - -If @code{re_compile_pattern} can't compile @var{regex}, it returns an -error string corresponding to one of the errors listed in @ref{POSIX -Regular Expression Compiling}. - - -@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions -@subsection GNU Matching - -@cindex matching with GNU functions - -Matching the @sc{gnu} way means trying to match as much of a string as -possible starting at a position within it you specify. Once you've compiled -a pattern into a pattern buffer (@pxref{GNU Regular Expression -Compiling}), you can ask the matcher to match that pattern against a -string using: - -@findex re_match -@example -int -re_match (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, struct re_registers *@var{regs}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer containing a -compiled pattern. @var{string} is the string you want to match; it can -contain newline and null characters. @var{size} is the length of that -string. @var{start} is the string index at which you want to -begin matching; the first character of @var{string} is at index zero. -@xref{Using Registers}, for a explanation of @var{regs}; you can safely -pass zero. - -@code{re_match} matches the regular expression in @var{pattern_buffer} -against the string @var{string} according to the syntax in -@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular -Expression Compiling}, for how to set it.) The function returns -@math{-1} if the compiled pattern does not match any part of -@var{string} and @math{-2} if an internal error happens; otherwise, it -returns how many (possibly zero) characters of @var{string} the pattern -matched. - -An example: suppose @var{pattern_buffer} points to a pattern buffer -containing the compiled pattern for @samp{a*}, and @var{string} points -to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start} -is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the -last three @samp{a}s in @var{string}. If @var{start} is 0, -@code{re_match} returns 5, i.e., @samp{a*} would have matched all the -@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns -zero. - -If @var{start} is not between zero and @var{size}, then -@code{re_match} returns @math{-1}. - - -@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions -@subsection GNU Searching - -@cindex searching with GNU functions - -@dfn{Searching} means trying to match starting at successive positions -within a string. The function @code{re_search} does this. - -Before calling @code{re_search}, you must compile your regular -expression. @xref{GNU Regular Expression Compiling}. - -Here is the function declaration: - -@findex re_search -@example -int -re_search (struct re_pattern_buffer *@var{pattern_buffer}, - const char *@var{string}, const int @var{size}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}) -@end example - -@noindent -@vindex start @r{argument to @code{re_search}} -@vindex range @r{argument to @code{re_search}} -whose arguments are the same as those to @code{re_match} (@pxref{GNU -Matching}) except that the two arguments @var{start} and @var{range} -replace @code{re_match}'s argument @var{start}. - -If @var{range} is positive, then @code{re_search} attempts a match -starting first at index @var{start}, then at @math{@var{start} + 1} if -that fails, and so on, up to @math{@var{start} + @var{range}}; if -@var{range} is negative, then it attempts a match starting first at -index @var{start}, then at @math{@var{start} -1} if that fails, and so -on. - -If @var{start} is not between zero and @var{size}, then @code{re_search} -returns @math{-1}. When @var{range} is positive, @code{re_search} -adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is -between zero and @var{size}, if necessary; that way it won't search -outside of @var{string}. Similarly, when @var{range} is negative, -@code{re_search} adjusts @var{range} so that @math{@var{start} + -@var{range} + 1} is between zero and @var{size}, if necessary. - -If the @code{fastmap} field of @var{pattern_buffer} is zero, -@code{re_search} matches starting at consecutive positions; otherwise, -it uses @code{fastmap} to make the search more efficient. -@xref{Searching with Fastmaps}. - -If no match is found, @code{re_search} returns @math{-1}. If -a match is found, it returns the index where the match began. If an -internal error happens, it returns @math{-2}. - - -@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions -@subsection Matching and Searching with Split Data - -Using the functions @code{re_match_2} and @code{re_search_2}, you can -match or search in data that is divided into two strings. - -The function: - -@findex re_match_2 -@example -int -re_match_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similar to @code{re_match} (@pxref{GNU Matching}) except that you -pass @emph{two} data strings and sizes, and an index @var{stop} beyond -which you don't want the matcher to try matching. As with -@code{re_match}, if it succeeds, @code{re_match_2} returns how many -characters of @var{string} it matched. Regard @var{string1} and -@var{string2} as concatenated when you set the arguments @var{start} and -@var{stop} and use the contents of @var{regs}; @code{re_match_2} never -returns a value larger than @math{@var{size1} + @var{size2}}. - -The function: - -@findex re_search_2 -@example -int -re_search_2 (struct re_pattern_buffer *@var{buffer}, - const char *@var{string1}, const int @var{size1}, - const char *@var{string2}, const int @var{size2}, - const int @var{start}, const int @var{range}, - struct re_registers *@var{regs}, - const int @var{stop}) -@end example - -@noindent -is similarly related to @code{re_search}. - - -@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions -@subsection Searching with Fastmaps - -@cindex fastmaps -If you're searching through a long string, you should use a fastmap. -Without one, the searcher tries to match at consecutive positions in the -string. Generally, most of the characters in the string could not start -a match. It takes much longer to try matching at a given position in the -string than it does to check in a table whether or not the character at -that position could start a match. A @dfn{fastmap} is such a table. - -More specifically, a fastmap is an array indexed by the characters in -your character set. Under the @sc{ascii} encoding, therefore, a fastmap -has 256 elements. If you want the searcher to use a fastmap with a -given pattern buffer, you must allocate the array and assign the array's -address to the pattern buffer's @code{fastmap} field. You either can -compile the fastmap yourself or have @code{re_search} do it for you; -when @code{fastmap} is nonzero, it automatically compiles a fastmap the -first time you search using a particular compiled pattern. - -To compile a fastmap yourself, use: - -@findex re_compile_fastmap -@example -int -re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer}) -@end example - -@noindent -@var{pattern_buffer} is the address of a pattern buffer. If the -character @var{c} could start a match for the pattern, -@code{re_compile_fastmap} makes -@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns -@math{0} if it can compile a fastmap and @math{-2} if there is an -internal error. For example, if @samp{|} is the alternation operator -and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then -@code{re_compile_fastmap} sets @code{fastmap['a']} and -@code{fastmap['b']} (and no others). - -@code{re_search} uses a fastmap as it moves along in the string: it -checks the string's characters until it finds one that's in the fastmap. -Then it tries matching at that character. If the match fails, it -repeats the process. So, by using a fastmap, @code{re_search} doesn't -waste time trying to match at positions in the string that couldn't -start a match. - -If you don't want @code{re_search} to use a fastmap, -store zero in the @code{fastmap} field of the pattern buffer before -calling @code{re_search}. - -Once you've initialized a pattern buffer's @code{fastmap} field, you -need never do so again---even if you compile a new pattern in -it---provided the way the field is set still reflects whether or not you -want a fastmap. @code{re_search} will still either do nothing if -@code{fastmap} is null or, if it isn't, compile a new fastmap for the -new pattern. - -@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions -@subsection GNU Translate Tables - -If you set the @code{translate} field of a pattern buffer to a translate -table, then the @sc{gnu} Regex functions to which you've passed that -pattern buffer use it to apply a simple transformation -to all the regular expression and string characters at which they look. - -A @dfn{translate table} is an array indexed by the characters in your -character set. Under the @sc{ascii} encoding, therefore, a translate -table has 256 elements. The array's elements are also characters in -your character set. When the Regex functions see a character @var{c}, -they use @code{translate[@var{c}]} in its place, with one exception: the -character after a @samp{\} is not translated. (This ensures that, the -operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.) - -For example, a table that maps all lowercase letters to the -corresponding uppercase ones would cause the matcher to ignore -differences in case.@footnote{A table that maps all uppercase letters to -the corresponding lowercase ones would work just as well for this -purpose.} Such a table would map all characters except lowercase letters -to themselves, and lowercase letters to the corresponding uppercase -ones. Under the @sc{ascii} encoding, here's how you could initialize -such a table (we'll call it @code{case_fold}): - -@example -for (i = 0; i < 256; i++) - case_fold[i] = i; -for (i = 'a'; i <= 'z'; i++) - case_fold[i] = i - ('a' - 'A'); -@end example - -You tell Regex to use a translate table on a given pattern buffer by -assigning that table's address to the @code{translate} field of that -buffer. If you don't want Regex to do any translation, put zero into -this field. You'll get weird results if you change the table's contents -anytime between compiling the pattern buffer, compiling its fastmap, and -matching or searching with the pattern buffer. - -@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions -@subsection Using Registers - -A group in a regular expression can match a (posssibly empty) substring -of the string that regular expression as a whole matched. The matcher -remembers the beginning and end of the substring matched by -each group. - -To find out what they matched, pass a nonzero @var{regs} argument to a -@sc{gnu} matching or searching function (@pxref{GNU Matching} and -@ref{GNU Searching}), i.e., the address of a structure of this type, as -defined in @file{regex.h}: - -@c We don't bother to include this directly from regex.h, -@c since it changes so rarely. -@example -@tindex re_registers -@vindex num_regs @r{in @code{struct re_registers}} -@vindex start @r{in @code{struct re_registers}} -@vindex end @r{in @code{struct re_registers}} -struct re_registers -@{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -@}; -@end example - -Except for (possibly) the @var{num_regs}'th element (see below), the -@var{i}th element of the @code{start} and @code{end} arrays records -information about the @var{i}th group in the pattern. (They're declared -as C pointers, but this is only because not all C compilers accept -zero-length arrays; conceptually, it is simplest to think of them as -arrays.) - -The @code{start} and @code{end} arrays are allocated in various ways, -depending on the value of the @code{regs_allocated} -@vindex regs_allocated -field in the pattern buffer passed to the matcher. - -The simplest and perhaps most useful is to let the matcher (re)allocate -enough space to record information for all the groups in the regular -expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED}, -@vindex REGS_UNALLOCATED -the matcher allocates @math{1 + @var{re_nsub}} (another field in the -pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set -to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}. -@vindex REGS_REALLOCATE -Then on subsequent calls with the same pattern buffer and @var{regs} -arguments, the matcher reallocates more space if necessary. - -It would perhaps be more logical to make the @code{regs_allocated} field -part of the @code{re_registers} structure, instead of part of the -pattern buffer. But in that case the caller would be forced to -initialize the structure before passing it. Much existing code doesn't -do this initialization, and it's arguably better to avoid it anyway. - -@code{re_compile_pattern} sets @code{regs_allocated} to -@code{REGS_UNALLOCATED}, -so if you use the GNU regular expression -functions, you get this behavior by default. - -xx document re_set_registers - -@sc{posix}, on the other hand, requires a different interface: the -caller is supposed to pass in a fixed-length array which the matcher -fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED} -@vindex REGS_FIXED -the matcher simply fills that array. - -The following examples illustrate the information recorded in the -@code{re_registers} structure. (In all of them, @samp{(} represents the -open-group and @samp{)} the close-group operator. The first character -in the string @var{string} is at index 0.) - -@c xx i'm not sure this is all true anymore. - -@itemize @bullet - -@item -If the regular expression has an @w{@var{i}-th} -group not contained within another group that matches a -substring of @var{string}, then the function sets -@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where -the substring matched by the @w{@var{i}-th} group begins, and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -substring's end. The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match @samp{((a)(b))} against @samp{ab}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} - -@item -1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]} -@end itemize - -@item -If a group matches more than once (as it might if followed by, -e.g., a repetition operator), then the function reports the information -about what the group @emph{last} matched. - -For example, when you match the pattern @samp{(a)*} against the string -@samp{aa}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]} - -@item -1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group does not participate in a -successful match, e.g., it is an alternative not taken or a -repetition operator allows zero repetitions of it, then the function -sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}. - -For example, when you match the pattern @samp{(a)*b} against -the string @samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} -@end itemize - -@item -If the @w{@var{i}-th} group matches a zero-length string, then the -function sets @code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that -zero-length string. - -For example, when you match the pattern @samp{(a*)b} against the string -@samp{b}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize - -@ignore -The function sets @code{@w{@var{regs}->}start[0]} and -@code{@w{@var{regs}->}end[0]} to analogous information about the entire -pattern. - -For example, when you match the pattern @samp{(a*)} against the empty -string, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]} - -@item -0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]} -@end itemize -@end ignore - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} and -the function reports a match of the @w{@var{i}-th} group, then it -records in @code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of -the @w{@var{j}-th} group. - -For example, when you match the pattern @samp{((a*)b)*} against the -string @samp{abb}, @w{group 2} last matches the empty string, so you -get what it previously matched: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]} -@end itemize - -When you match the pattern @samp{((a)*b)*} against the string -@samp{abb}, @w{group 2} doesn't participate in the last match, so you -get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]} - -@item -2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]} - -@item -0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]} -@end itemize - -@item -If an @w{@var{i}-th} group contains a @w{@var{j}-th} group -in turn not contained within any other group within group @var{i} -and the function sets -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets -@code{@w{@var{regs}->}start[@var{j}]} and -@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}. - -For example, when you match the pattern @samp{((a)*b)*c} against the -string @samp{c}, you get: - -@itemize -@item -0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]} - -@item -@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]} -@end itemize - -@end itemize - -@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions -@subsection Freeing GNU Pattern Buffers - -To free any allocated fields of a pattern buffer, you can use the -@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers}, -since the type @code{regex_t}---the type for @sc{posix} pattern -buffers---is equivalent to the type @code{re_pattern_buffer}. After -freeing a pattern buffer, you need to again compile a regular expression -in it (@pxref{GNU Regular Expression Compiling}) before passing it to -a matching or searching function. - - -@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex -@section POSIX Regex Functions - -If you're writing code that has to be @sc{posix} compatible, you'll need -to use these functions. Their interfaces are as specified by @sc{posix}, -draft 1003.2/D11.2. - -@menu -* POSIX Pattern Buffers:: The regex_t type. -* POSIX Regular Expression Compiling:: regcomp () -* POSIX Matching:: regexec () -* Reporting Errors:: regerror () -* Using Byte Offsets:: The regmatch_t type. -* Freeing POSIX Pattern Buffers:: regfree () -@end menu - - -@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions -@subsection POSIX Pattern Buffers - -To compile or match a given regular expression the @sc{posix} way, you -must supply a pattern buffer exactly the way you do for @sc{gnu} -(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type -@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer -type @code{re_pattern_buffer}. - - -@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions -@subsection POSIX Regular Expression Compiling - -With @sc{posix}, you can only search for a given regular expression; you -can't match it. To do this, you must first compile it in a -pattern buffer, using @code{regcomp}. - -@ignore -Before calling @code{regcomp}, you must initialize this pattern buffer -as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See -below, however, for how to choose a syntax with which to compile. -@end ignore - -To compile a pattern buffer, use: - -@findex regcomp -@example -int -regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags}) -@end example - -@noindent -@var{preg} is the initialized pattern buffer's address, @var{regex} is -the regular expression's address, and @var{cflags} is the compilation -flags, which Regex considers as a collection of bits. Here are the -valid bits, as defined in @file{regex.h}: - -@table @code - -@item REG_EXTENDED -@vindex REG_EXTENDED -says to use @sc{posix} Extended Regular Expression syntax; if this isn't -set, then says to use @sc{posix} Basic Regular Expression syntax. -@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly. - -@item REG_ICASE -@vindex REG_ICASE -@cindex ignoring case -says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate} -field to a translate table which ignores case, replacing anything you've -put there before. - -@item REG_NOSUB -@vindex REG_NOSUB -says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching}, -for what this means. - -@item REG_NEWLINE -@vindex REG_NEWLINE -says that a: - -@itemize @bullet - -@item -match-any-character operator (@pxref{Match-any-character -Operator}) doesn't match a newline. - -@item -nonmatching list not containing a newline (@pxref{List -Operators}) matches a newline. - -@item -match-beginning-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately after a newline, -regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for -an explanation of @code{REG_NOTBOL}). - -@item -match-end-of-line operator (@pxref{Match-beginning-of-line -Operator}) matches the empty string immediately before a newline, -regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching}, -for an explanation of @code{REG_NOTEOL}). - -@end itemize - -@end table - -If @code{regcomp} successfully compiles the regular expression, it -returns zero and sets @code{*@var{pattern_buffer}} to the compiled -pattern. Except for @code{syntax} (which it sets as explained above), it -also sets the same fields the same way as does the @sc{gnu} compiling -function (@pxref{GNU Regular Expression Compiling}). - -If @code{regcomp} can't compile the regular expression, it returns one -of the error codes listed here. (Except when noted differently, the -syntax of in all examples below is basic regular expression syntax.) - -@table @code - -@comment repetitions -@item REG_BADRPT -For example, the consecutive repetition operators @samp{**} in -@samp{a**} are invalid. As another example, if the syntax is extended -regular expression syntax, then the repetition operator @samp{*} with -nothing on which to operate in @samp{*} is invalid. - -@item REG_BADBR -For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid. - -@item REG_EBRACE -For example, @samp{a\@{1} is missing a close-interval operator. - -@comment lists -@item REG_EBRACK -For example, @samp{[a} is missing a close-list operator. - -@item REG_ERANGE -For example, the range ending point @samp{z} that collates lower than -does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the -range with the character class @samp{[:alpha:]} as its starting point in -@samp{[[:alpha:]-|]}. - -@item REG_ECTYPE -For example, the character class name @samp{foo} in @samp{[[:foo:]} is -invalid. - -@comment groups -@item REG_EPAREN -For example, @samp{a\)} is missing an open-group operator and @samp{\(a} -is missing a close-group operator. - -@item REG_ESUBREG -For example, the back reference @samp{\2} that refers to a nonexistent -subexpression in @samp{\(a\)\2} is invalid. - -@comment unfinished business - -@item REG_EEND -Returned when a regular expression causes no other more specific error. - -@item REG_EESCAPE -For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the -one in @samp{\}. - -@comment kitchen sink -@item REG_BADPAT -For example, in the extended regular expression syntax, the empty group -@samp{()} in @samp{a()b} is invalid. - -@comment internal -@item REG_ESIZE -Returned when a regular expression needs a pattern buffer larger than -65536 bytes. - -@item REG_ESPACE -Returned when a regular expression makes Regex to run out of memory. - -@end table - - -@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions -@subsection POSIX Matching - -Matching the @sc{posix} way means trying to match a null-terminated -string starting at its first character. Once you've compiled a pattern -into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you -can ask the matcher to match that pattern against a string using: - -@findex regexec -@example -int -regexec (const regex_t *@var{preg}, const char *@var{string}, - size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags}) -@end example - -@noindent -@var{preg} is the address of a pattern buffer for a compiled pattern. -@var{string} is the string you want to match. - -@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you -pass zero for @var{nmatch} or you compiled @var{preg} with the -compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore -@var{pmatch}; otherwise, you must allocate it to have at least -@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte -offsets in @var{pmatch}, and set to @math{-1} any unused elements up to -@math{@var{pmatch}@code{[@var{nmatch}]} - 1}. - -@var{eflags} specifies @dfn{execution flags}---namely, the two bits -@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If -you set @code{REG_NOTBOL}, then the match-beginning-of-line operator -(@pxref{Match-beginning-of-line Operator}) always fails to match. -This lets you match against pieces of a line, as you would need to if, -say, searching for repeated instances of a given pattern in a line; it -would work correctly for patterns both with and without -match-beginning-of-line operators. @code{REG_NOTEOL} works analogously -for the match-end-of-line operator (@pxref{Match-end-of-line -Operator}); it exists for symmetry. - -@code{regexec} tries to find a match for @var{preg} in @var{string} -according to the syntax in @var{preg}'s @code{syntax} field. -(@xref{POSIX Regular Expression Compiling}, for how to set it.) The -function returns zero if the compiled pattern matches @var{string} and -@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't. - -@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions -@subsection Reporting Errors - -If either @code{regcomp} or @code{regexec} fail, they return a nonzero -error code, the possibilities for which are defined in @file{regex.h}. -@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for -what these codes mean. To get an error string corresponding to these -codes, you can use: - -@findex regerror -@example -size_t -regerror (int @var{errcode}, - const regex_t *@var{preg}, - char *@var{errbuf}, - size_t @var{errbuf_size}) -@end example - -@noindent -@var{errcode} is an error code, @var{preg} is the address of the pattern -buffer which provoked the error, @var{errbuf} is the error buffer, and -@var{errbuf_size} is @var{errbuf}'s size. - -@code{regerror} returns the size in bytes of the error string -corresponding to @var{errcode} (including its terminating null). If -@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in -@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the -error string, followed by a null. -@var{errbuf_size} must be a nonnegative number less than or equal to the -size in bytes of @var{errbuf}. - -You can call @code{regerror} with a null @var{errbuf} and a zero -@var{errbuf_size} to determine how large @var{errbuf} need be to -accommodate @code{regerror}'s error string. - -@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions -@subsection Using Byte Offsets - -In @sc{posix}, variables of type @code{regmatch_t} hold analogous -information, but are not identical to, @sc{gnu}'s registers (@pxref{Using -Registers}). To get information about registers in @sc{posix}, pass to -@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e., -the address of a structure of this type, defined in -@file{regex.h}: - -@tindex regmatch_t -@example -typedef struct -@{ - regoff_t rm_so; - regoff_t rm_eo; -@} regmatch_t; -@end example - -When reading in @ref{Using Registers}, about how the matching function -stores the information into the registers, substitute @var{pmatch} for -@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for -@code{@w{@var{regs}->}start[@var{i}]} and -@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for -@code{@w{@var{regs}->}end[@var{i}]}. - -@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions -@subsection Freeing POSIX Pattern Buffers - -To free any allocated fields of a pattern buffer, use: - -@findex regfree -@example -void -regfree (regex_t *@var{preg}) -@end example - -@noindent -@var{preg} is the pattern buffer whose allocated fields you want freed. -@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used} -fields to zero. After freeing a pattern buffer, you need to again -compile a regular expression in it (@pxref{POSIX Regular Expression -Compiling}) before passing it to the matching function (@pxref{POSIX -Matching}). - - -@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex -@section BSD Regex Functions - -If you're writing code that has to be Berkeley @sc{unix} compatible, -you'll need to use these functions whose interfaces are the same as those -in Berkeley @sc{unix}. - -@menu -* BSD Regular Expression Compiling:: re_comp () -* BSD Searching:: re_exec () -@end menu - -@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions -@subsection BSD Regular Expression Compiling - -With Berkeley @sc{unix}, you can only search for a given regular -expression; you can't match one. To search for it, you must first -compile it. Before you compile it, you must indicate the regular -expression syntax you want it compiled according to by setting the -variable @code{re_syntax_options} (declared in @file{regex.h} to some -syntax (@pxref{Regular Expression Syntax}). - -To compile a regular expression use: - -@findex re_comp -@example -char * -re_comp (char *@var{regex}) -@end example - -@noindent -@var{regex} is the address of a null-terminated regular expression. -@code{re_comp} uses an internal pattern buffer, so you can use only the -most recently compiled pattern buffer. This means that if you want to -use a given regular expression that you've already compiled---but it -isn't the latest one you've compiled---you'll have to recompile it. If -you call @code{re_comp} with the null string (@emph{not} the empty -string) as the argument, it doesn't change the contents of the pattern -buffer. - -If @code{re_comp} successfully compiles the regular expression, it -returns zero. If it can't compile the regular expression, it returns -an error string. @code{re_comp}'s error messages are identical to those -of @code{re_compile_pattern} (@pxref{GNU Regular Expression -Compiling}). - -@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions -@subsection BSD Searching - -Searching the Berkeley @sc{unix} way means searching in a string -starting at its first character and trying successive positions within -it to find a match. Once you've compiled a pattern using @code{re_comp} -(@pxref{BSD Regular Expression Compiling}), you can ask Regex -to search for that pattern in a string using: - -@findex re_exec -@example -int -re_exec (char *@var{string}) -@end example - -@noindent -@var{string} is the address of the null-terminated string in which you -want to search. - -@code{re_exec} returns either 1 for success or 0 for failure. It -automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}). - - -@node Copying, Index, Programming with Regex, Top -@appendix GNU GENERAL PUBLIC LICENSE -@center Version 2, June 1991 - -@display -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -675 Mass Ave, Cambridge, MA 02139, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@unnumberedsec Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - -@iftex -@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end iftex -@ifinfo -@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end ifinfo - -@enumerate -@item -This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The ``Program'', below, -refers to any such program or work, and a ``work based on the Program'' -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term ``modification''.) Each licensee is addressed as ``you''. - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - -@item -You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - -@item -You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - -@enumerate a -@item -You must cause the modified files to carry prominent notices -stating that you changed the files and the date of any change. - -@item -You must cause any work that you distribute or publish, that in -whole or in part contains or is derived from the Program or any -part thereof, to be licensed as a whole at no charge to all third -parties under the terms of this License. - -@item -If the modified program normally reads commands interactively -when run, you must cause it, when started running for such -interactive use in the most ordinary way, to print or display an -announcement including an appropriate copyright notice and a -notice that there is no warranty (or else, saying that you provide -a warranty) and that users may redistribute the program under -these conditions, and telling the user how to view a copy of this -License. (Exception: if the Program itself is interactive but -does not normally print such an announcement, your work based on -the Program is not required to print an announcement.) -@end enumerate - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - -@item -You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - -@enumerate a -@item -Accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of Sections -1 and 2 above on a medium customarily used for software interchange; or, - -@item -Accompany it with a written offer, valid for at least three -years, to give any third party, for a charge no more than your -cost of physically performing source distribution, a complete -machine-readable copy of the corresponding source code, to be -distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -@item -Accompany it with the information you received as to the offer -to distribute corresponding source code. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form with such -an offer, in accord with Subsection b above.) -@end enumerate - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - -@item -You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - -@item -You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - -@item -If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - -@item -If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@iftex -@heading NO WARRANTY -@end iftex -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@iftex -@heading END OF TERMS AND CONDITIONS -@end iftex -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@unnumberedsec Appendix: How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and a brief idea of what it does.} -Copyright (C) 19@var{yy} @var{name of author} - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. -This is free software, and you are welcome to redistribute it -under certain conditions; type `show c' for details. -@end smallexample - -The hypothetical commands @samp{show w} and @samp{show c} should show -the appropriate parts of the General Public License. Of course, the -commands you use may be called something other than @samp{show w} and -@samp{show c}; they could even be mouse-clicks or menu items---whatever -suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here is a sample; alter the names: - -@example -Yoyodyne, Inc., hereby disclaims all copyright interest in the program -`Gnomovision' (which makes passes at compilers) written by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end example - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. - - -@node Index, , Copying, Top -@unnumbered Index - -@printindex cp - -@contents - -@bye diff --git a/gnu/lib/libregex/test/TAGS b/gnu/lib/libregex/test/TAGS deleted file mode 100644 index d3aad750dcba..000000000000 --- a/gnu/lib/libregex/test/TAGS +++ /dev/null @@ -1,373 +0,0 @@ - -.././regex.c,4137 -#define AT_STRINGS_BEG(3078,98376 -#define AT_STRINGS_END(3079,98449 -#define AT_WORD_BOUNDARY(3093,99002 -#define BUF_PUSH(887,24995 -#define BUF_PUSH_2(895,25208 -#define BUF_PUSH_3(904,25437 -#define DEBUG_POP(2336,74614 -#define DEBUG_PRINT1(471,14296 -#define DEBUG_PRINT1(785,21263 -#define DEBUG_PRINT2(472,14342 -#define DEBUG_PRINT3(473,14398 -#define DEBUG_PRINT3(787,21316 -#define DEBUG_PRINT4(474,14462 -#define DEBUG_PRINT_COMPILED_PATTERN(475,14534 -#define DEBUG_PRINT_COMPILED_PATTERN(789,21386 -#define DEBUG_PRINT_DOUBLE_STRING(477,14637 -#define DEBUG_PUSH(2338,74684 -#define DEBUG_STATEMENT(470,14267 -#define DOUBLE_FAIL_STACK(2299,73230 -#define EVER_MATCHED_SOMETHING(3028,96680 -#define EXTEND_BUFFER(941,26834 -#define EXTRACT_NUMBER(403,12499 -#define EXTRACT_NUMBER(422,12960 -#define EXTRACT_NUMBER_AND_INCR(430,13181 -#define EXTRACT_NUMBER_AND_INCR(448,13583 -#define FAIL_STACK_EMPTY(2271,72289 -#define FAIL_STACK_FULL(2273,72404 -#define FAIL_STACK_PTR_EMPTY(2272,72344 -#define FAIL_STACK_TOP(2274,72473 -#define FIRST_STRING_P(221,5848 -#define FREE_VAR(3100,99186 -#define FREE_VARIABLES(3101,99240 -#define FREE_VARIABLES(3116,99751 -#define GET_BUFFER_SPACE(882,24802 -#define GET_UNSIGNED_NUMBER(1017,29312 -#define INIT_FAIL_STACK(2279,72612 -#define INSERT_JUMP(923,26079 -#define INSERT_JUMP2(927,26236 -#define ISALNUM(147,3407 -#define ISALPHA(148,3455 -#define ISBLANK(135,3062 -#define ISBLANK(137,3116 -#define ISCNTRL(149,3503 -#define ISDIGIT(146,3359 -#define ISGRAPH(140,3185 -#define ISGRAPH(142,3239 -#define ISLOWER(150,3551 -#define ISPRINT(145,3311 -#define ISPUNCT(151,3599 -#define ISSPACE(152,3647 -#define ISUPPER(153,3695 -#define ISXDIGIT(154,3743 -#define IS_ACTIVE(3026,96578 -#define IS_CHAR_CLASS(1035,29793 -#define MATCHED_SOMETHING(3027,96621 -#define MAX(233,6292 -#define MIN(234,6334 -#define PATFETCH(852,23769 -#define PATFETCH_RAW(860,24020 -#define POINTER_TO_OFFSET(3050,97433 -#define POP_FAILURE_ITEM(2331,74426 -#define POP_FAILURE_POINT(2461,79538 -#define PREFETCH(3064,97916 -#define PUSH_FAILURE_ITEM(2327,74253 -#define PUSH_FAILURE_POINT(2352,75048 -#define PUSH_PATTERN_OP(2317,73841 -#define REGEX_REALLOCATE(185,4875 -#define REGEX_REALLOCATE(210,5495 -#define REGEX_TALLOC(227,6137 -#define REG_MATCH_NULL_STRING_P(3025,96511 -#define REG_UNSET(3055,97649 -#define RETALLOC(226,6058 -#define SET_LIST_BIT(1011,29089 -#define SET_REGS_MATCHED(3034,96936 -#define SIGN_EXTEND_CHAR(166,4109 -#define SIGN_EXTEND_CHAR(169,4217 -#define STORE_JUMP(915,25800 -#define STORE_JUMP2(919,25917 -#define STORE_NUMBER(384,11919 -#define STORE_NUMBER_AND_INCR(394,12242 -#define STREQ(231,6244 -#define SYNTAX(120,2790 -#define TALLOC(225,6003 -#define TRANSLATE(873,24503 -#define WORDCHAR_P(3086,98755 -alt_match_null_string_p 4466,149039 -#define assert(782,21217 -at_begline_loc_p 2131,67979 -at_endline_loc_p 2150,68557 -#define bcmp(54,1656 -bcmp_translate 4591,151831 -#define bcopy(57,1726 -typedef char boolean;236,6377 -#define bzero(60,1793 -common_op_match_null_string_p 4503,149895 -compile_range 2200,69997 -} compile_stack_elt_t;990,28602 -} compile_stack_type;998,28748 -extract_number 411,12714 -extract_number_and_incr 438,13370 -} fail_stack_type;2269,72269 -group_in_compile_stack 2172,69174 -group_match_null_string_p 4357,145267 -init_syntax_once 94,2365 -insert_op1 2091,67107 -insert_op2 2110,67475 -#define isascii(131,3018 -typedef int pattern_offset_t;981,28388 -print_compiled_pattern 726,19792 -print_double_string 753,20605 -print_fastmap 486,14835 -print_partial_compiled_pattern 518,15475 -re_comp 4650,153479 -re_compile_fastmap 2532,82428 -re_compile_pattern 4617,152520 -re_exec 4688,154373 -re_match 3136,100557 -re_match_2 3161,101399 -} re_opcode_t;378,11781 -re_search 2844,90872 -re_search_2 2877,91998 -re_set_registers 2817,90247 -re_set_syntax 808,22087 -regcomp 4736,155972 -regerror 4876,160188 -regex_compile 1062,30922 -regexec 4811,158371 -regfree 4920,161247 -} register_info_type;3023,96488 -typedef unsigned regnum_t;974,28172 -store_op1 2063,66535 -store_op2 2076,66768 -typedef const unsigned 2262,72103 - -.././regex.h,230 -#define _RE_ARGS(394,14981 -#define _RE_ARGS(398,15036 -} reg_errcode_t;270,10874 -typedef unsigned reg_syntax_t;38,1503 -typedef struct re_pattern_buffer regex_t;346,13556 -} regmatch_t;382,14634 -typedef int regoff_t;354,13814 - -getpagesize.h,84 -#define getpagesize(12,137 -#define getpagesize(15,191 -#define getpagesize(20,302 - -test.h,436 -#define BRACES_TO_OPS(107,3169 -#define INVALID_PATTERN(110,3328 -#define MATCH_SELF(114,3429 -#define PARENS_TO_OPS(108,3248 -#define SAFE_STRLEN(14,201 -#define TEST_POSITIONED_MATCH(116,3470 -#define TEST_REGISTERS(104,3011 -#define TEST_REGISTERS_2(97,2703 -#define TEST_SEARCH(127,3875 -#define TEST_SEARCH_2(123,3720 -#define TEST_TRUNCATED_MATCH(120,3608 -typedef enum { false = 0, true = 1 } boolean;16,255 -} test_type;33,572 - -alloca.c,128 -alloca 141,3996 -find_stack_direction 85,2553 -} header;127,3538 -typedef void *pointer;51,1721 -typedef char *pointer;53,1778 - -bsd-interf.c,51 -test_berk_search 8,106 -test_bsd_interface 33,738 - -debugmalloc.c,395 -#define TRACE(8,143 -#define TRACE1(9,197 -#define TRACE2(10,254 -#define TRACE3(11,319 -#define TRACE4(12,392 -#define USER_ALLOC(61,1440 -typedef char *address;15,480 -} *chunk;54,1225 -chunk_delete 115,2778 -chunk_insert 96,2294 -chunk_to_mem 79,1916 -free 261,5604 -free_list_available 175,3947 -malloc 203,4343 -mem_to_chunk 68,1703 -realloc 242,5309 -validate_list 153,3478 -xsbrk 21,545 - -emacsmalloc.c,574 -#define ASSERT(178,5884 -#define ASSERT(181,5985 -#define CHAIN(166,5430 -#define bcmp(73,2821 -#define bcopy(72,2777 -#define bzero(74,2868 -calloc 603,15983 -free 484,13255 -get_lim_data 736,18517 -get_lim_data 752,18767 -get_lim_data 759,18860 -getpool 374,10263 -malloc 413,11133 -malloc_init 218,6863 -malloc_mem_free 707,17940 -malloc_mem_used 688,17683 -malloc_stats 663,17320 -malloc_usable_size 233,7147 -memalign 618,16164 -morecore 244,7380 -realloc 541,14424 -#define start_of_data(110,3486 -#define start_of_data(115,3546 -sys_sbrk 815,20804 -valloc 645,17031 - -fileregex.c,13 -main 11,156 - -g++malloc.c,1543 -#define UPDATE_STATS(33,1090 -#define UPDATE_STATS(35,1131 -static inline int aligned_OK(343,11189 -void* calloc(1039,28692 -void cfree(1048,28894 -static inline void* chunk2mem(619,19336 -#define clear_inuse(592,18767 -static inline void consollink(716,21398 -static void do_free_stats(544,18016 -static void do_malloc_stats(534,17741 -766,22304 -extern 762,22235 - for 1260,34165 -void free(1028,28553 -static inline void frontlink(732,21717 -static unsigned int gcd(557,18251 - if 1212,32427 - if 1216,32582 - if 1220,32737 - if 1224,32880 - if 1229,33094 - if 1233,33251 - if 1238,33463 - if 1242,33609 - if 1247,33739 -#define inuse(590,18680 -static inline unsigned int lcm(580,18540 -void* malloc(939,26370 -static mchunkptr malloc_find_space(858,24561 -void malloc_stats(1201,32256 -unsigned int malloc_usable_size(1054,28936 -static volatile void malloc_user_error(286,9757 -static void malloc_user_error(288,9804 -typedef struct malloc_bin* mbinptr;320,10636 -typedef struct malloc_chunk* mchunkptr;309,10247 -static inline mchunkptr mem2chunk(643,19759 -void* memalign(1118,30363 -#define next_chunk(600,18910 -#define prev_chunk(604,19023 -void* realloc(1071,29263 -static inline unsigned int request2size(335,10993 -mchunkptr sanity_check(628,19486 -#define set_inuse(591,18723 -static inline void set_size(609,19149 -static inline mbinptr size2bin(499,16914 -static inline void split(685,20463 -static 768,22312 -static inline void unlink(671,20263 -void* valloc(1194,32107 -typedef volatile void 760,22184 -764,22271 - -iregex.c,54 -main 20,390 -print_regs 141,2638 -scanstring 87,1839 - -main.c,13 -main 12,242 - -malloc-test.c,112 -#define BITS_BLOCK(12,168 -#define BITS_MASK(13,228 -} bits_list_type;6,56 -init_bits_list 16,311 -main(32,621 - -other.c,18 -test_others 6,96 - -printchar.c,15 -printchar 2,5 - -psx-basic.c,23 -test_posix_basic 7,84 - -psx-extend.c,26 -test_posix_extended 7,88 - -psx-generic.c,26 -test_posix_generic 8,117 - -psx-group.c,20 -test_grouping 7,92 - -psx-interf.c,416 -fill_pmatch 174,4802 -get_error_string 18,260 -init_pattern_buffer 49,1434 -test_compile 67,1925 -test_eflags 245,6876 -test_error_code_allocation 562,16619 -test_error_code_message 524,15247 -test_ignore_case 303,8525 -test_newline 330,9199 -test_nsub 117,3319 -test_pmatch 188,5121 -test_posix_interface 614,18719 -test_posix_match 359,9938 -test_regcomp 138,3725 -test_regerror 592,17621 -test_regexec 394,10783 - -psx-interv.c,21 -test_intervals 6,93 - -test.c,607 -#define SET_FASTMAP(447,13999 -#define bcmp(18,362 -#define bcopy(19,415 -#define bzero(20,473 -compile_and_print_pattern 666,19653 -concat 97,2673 -delimiters_to_ops 571,17477 -general_test 115,2996 -invalid_pattern 542,16821 -#define memcmp(26,611 -#define memcpy(27,660 -print_pattern_info 635,18998 -set_all_registers 58,1390 -test_all_registers 506,15567 -test_case_fold 682,19993 -test_fastmap 460,14363 -test_fastmap_search 474,14668 -test_match 776,22235 -test_match_2 766,22040 -test_match_n_times 715,20798 -test_search_return 408,13011 -valid_nonposix_pattern 646,19239 -valid_pattern 557,17182 - -tregress.c,208 -#define SIMPLE_MATCH(74,1463 -#define SIMPLE_NONMATCH(75,1528 -do_match 78,1599 -itoa 10,199 -simple_compile 44,882 -simple_fail 21,353 -simple_fastmap 55,1115 -simple_search 100,2020 -test_regress 124,2513 - -upcase.c,0 - -xmalloc.c,14 -xmalloc 9,87 |