diff options
Diffstat (limited to 'contrib/texinfo/makeinfo/html.c')
-rw-r--r-- | contrib/texinfo/makeinfo/html.c | 591 |
1 files changed, 385 insertions, 206 deletions
diff --git a/contrib/texinfo/makeinfo/html.c b/contrib/texinfo/makeinfo/html.c index 0c06c6c6fb1f..be2c136d41ff 100644 --- a/contrib/texinfo/makeinfo/html.c +++ b/contrib/texinfo/makeinfo/html.c @@ -1,7 +1,8 @@ /* html.c -- html-related utilities. - $Id: html.c,v 1.18 2003/06/02 12:32:29 karl Exp $ + $Id: html.c,v 1.28 2004/12/06 01:13:06 karl Exp $ - Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004 Free Software + Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,115 +20,13 @@ #include "system.h" #include "cmds.h" +#include "files.h" #include "html.h" #include "lang.h" #include "makeinfo.h" +#include "node.h" #include "sectioning.h" -HSTACK *htmlstack = NULL; - -static char *process_css_file (/* char * */); - -/* See html.h. */ -int html_output_head_p = 0; -int html_title_written = 0; - - -void -html_output_head () -{ - static const char *html_title = NULL; - - if (html_output_head_p) - return; - html_output_head_p = 1; - - /* The <title> should not have markup, so use text_expansion. */ - if (!html_title) - html_title = title ? text_expansion (title) : _("Untitled"); - - add_word_args ("<html lang=\"%s\">\n<head>\n<title>%s</title>\n", - language_table[language_code].abbrev, html_title); - - add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html"); - if (document_encoding_code != no_encoding) - add_word_args ("; charset=%s", - encoding_table[document_encoding_code].encname); - add_word ("\">\n"); - - if (!document_description) - document_description = html_title; - - add_word_args ("<meta name=\"description\" content=\"%s\">\n", - document_description); - add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n", - VERSION); -#if 0 - /* let's not do this now, since it causes mozilla to put up a - navigation bar. */ - add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \ -rel=\"generator-home\">\n"); -#endif - - if (copying_text) - { /* copying_text has already been fully expanded in - begin_insertion (by full_expansion), so use insert_ rather than - add_. It is not ideal that we include the html markup here within - <head>, but the alternative is to have yet more and different - expansions of the copying text. Yuck. */ - insert_string ("<!--\n"); - insert_string (copying_text); - insert_string ("-->\n"); - } - - /* Put the style definitions in a comment for the sake of browsers - that don't support <style>. */ - add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n"); - add_word ("<style type=\"text/css\"><!--\n"); - - { - char *css_inline = NULL; - - if (css_include) - /* This writes out any @import commands from the --css-file, - and returns any actual css code following the imports. */ - css_inline = process_css_file (css_include); - - /* This seems cleaner than adding <br>'s at the end of each line for - these "roman" displays. It's hardly the end of the world if the - browser doesn't do <style>s, in any case; they'll just come out in - typewriter. */ -#define CSS_FONT_INHERIT "font-family:inherit" - add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT); - add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT); - - /* Alternatively, we could do <font size=-1> in insertion.c, but this - way makes it easier to override. */ -#define CSS_FONT_SMALLER "font-size:smaller" - add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT, - CSS_FONT_SMALLER); - add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT, - CSS_FONT_SMALLER); - add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER); - add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER); - - /* Write out any css code from the user's --css-file. */ - if (css_inline) - add_word (css_inline); - - add_word ("--></style>\n"); - } - - add_word ("</head>\n<body>\n"); - - if (title && !html_title_written && titlepage_cmd_present) - { - add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title); - html_title_written = 1; - } -} - - /* Append CHAR to BUFFER, (re)allocating as necessary. We don't handle null characters. */ @@ -139,9 +38,8 @@ typedef struct char *buffer; } buffer_type; - static buffer_type * -init_buffer () +init_buffer (void) { buffer_type *buf = xmalloc (sizeof (buffer_type)); buf->length = 0; @@ -151,11 +49,8 @@ init_buffer () return buf; } - static void -append_char (buf, c) - buffer_type *buf; - int c; +append_char (buffer_type *buf, int c) { buf->length++; if (buf->length >= buf->size) @@ -167,17 +62,15 @@ append_char (buf, c) buf->buffer[buf->length] = 0; } - /* Read the cascading style-sheet file FILENAME. Write out any @import commands, which must come first, by the definition of css. If the file contains any actual css code following the @imports, return it; else return NULL. */ - static char * -process_css_file (filename) - char *filename; +process_css_file (char *filename) { - int c, lastchar; + int c; + int lastchar = 0; FILE *f; buffer_type *import_text = init_buffer (); buffer_type *inline_text = init_buffer (); @@ -185,6 +78,8 @@ process_css_file (filename) enum { null_state, comment_state, import_state, inline_state } state = null_state, prev_state; + prev_state = null_state; + /* read from stdin if `-' is the filename. */ f = STREQ (filename, "-") ? stdin : fopen (filename, "r"); if (!f) @@ -204,12 +99,22 @@ process_css_file (filename) { case null_state: /* between things */ if (c == '@') - { - /* If there's some other @command, just call it an - import, it's all the same to us. So don't bother - looking for the `import'. */ - append_char (import_text, c); - state = import_state; + { /* Only @import and @charset should switch into + import_state, other @-commands, such as @media, should + put us into inline_state. I don't think any other css + @-commands start with `i' or `c', although of course + this will break when such a command is defined. */ + int nextchar = getc (f); + if (nextchar == 'i' || nextchar == 'c') + { + append_char (import_text, c); + state = import_state; + } + else + { + ungetc (nextchar, f); /* wasn't an @import */ + state = inline_state; + } } else if (c == '/') { /* possible start of a comment */ @@ -277,23 +182,176 @@ process_css_file (filename) /* We're wasting the buffer struct memory, but so what. */ return inline_text->buffer; } + +HSTACK *htmlstack = NULL; + +/* See html.h. */ +int html_output_head_p = 0; +int html_title_written = 0; + +void +html_output_head (void) +{ + static const char *html_title = NULL; + char *encoding; + + if (html_output_head_p) + return; + html_output_head_p = 1; + + encoding = current_document_encoding (); + + /* The <title> should not have markup, so use text_expansion. */ + if (!html_title) + html_title = escape_string (title ? + text_expansion (title) : (char *) _("Untitled")); + + /* Make sure this is the very first string of the output document. */ + output_paragraph_offset = 0; + + add_html_block_elt_args ("<html lang=\"%s\">\n<head>\n", + language_table[language_code].abbrev); + + /* When splitting, add current node's name to title if it's available and not + Top. */ + if (splitting && current_node && !STREQ (current_node, "Top")) + add_word_args ("<title>%s - %s</title>\n", + escape_string (xstrdup (current_node)), html_title); + else + add_word_args ("<title>%s</title>\n", html_title); + + add_word ("<meta http-equiv=\"Content-Type\" content=\"text/html"); + if (encoding && *encoding) + add_word_args ("; charset=%s", encoding); + + add_word ("\">\n"); + + if (!document_description) + document_description = html_title; + + add_word_args ("<meta name=\"description\" content=\"%s\">\n", + document_description); + add_word_args ("<meta name=\"generator\" content=\"makeinfo %s\">\n", + VERSION); + + /* Navigation bar links. */ + if (!splitting) + add_word ("<link title=\"Top\" rel=\"top\" href=\"#Top\">\n"); + else if (tag_table) + { + /* Always put a top link. */ + add_word ("<link title=\"Top\" rel=\"start\" href=\"index.html#Top\">\n"); + + /* We already have a top link, avoid duplication. */ + if (tag_table->up && !STREQ (tag_table->up, "Top")) + add_link (tag_table->up, "rel=\"up\""); + + if (tag_table->prev) + add_link (tag_table->prev, "rel=\"prev\""); + + if (tag_table->next) + add_link (tag_table->next, "rel=\"next\""); + + /* fixxme: Look for a way to put links to various indices in the + document. Also possible candidates to be added here are First and + Last links. */ + } + else + { + /* We are splitting, but we neither have a tag_table. So this must be + index.html. So put a link to Top. */ + add_word ("<link title=\"Top\" rel=\"start\" href=\"#Top\">\n"); + } + + add_word ("<link href=\"http://www.gnu.org/software/texinfo/\" \ +rel=\"generator-home\" title=\"Texinfo Homepage\">\n"); + + if (copying_text) + { /* It is not ideal that we include the html markup here within + <head>, so we use text_expansion. */ + insert_string ("<!--\n"); + insert_string (text_expansion (copying_text)); + insert_string ("-->\n"); + } + + /* Put the style definitions in a comment for the sake of browsers + that don't support <style>. */ + add_word ("<meta http-equiv=\"Content-Style-Type\" content=\"text/css\">\n"); + add_word ("<style type=\"text/css\"><!--\n"); + + { + char *css_inline = NULL; + + if (css_include) + /* This writes out any @import commands from the --css-file, + and returns any actual css code following the imports. */ + css_inline = process_css_file (css_include); + + /* This seems cleaner than adding <br>'s at the end of each line for + these "roman" displays. It's hardly the end of the world if the + browser doesn't do <style>s, in any case; they'll just come out in + typewriter. */ +#define CSS_FONT_INHERIT "font-family:inherit" + add_word_args (" pre.display { %s }\n", CSS_FONT_INHERIT); + add_word_args (" pre.format { %s }\n", CSS_FONT_INHERIT); + + /* Alternatively, we could do <font size=-1> in insertion.c, but this + way makes it easier to override. */ +#define CSS_FONT_SMALLER "font-size:smaller" + add_word_args (" pre.smalldisplay { %s; %s }\n", CSS_FONT_INHERIT, + CSS_FONT_SMALLER); + add_word_args (" pre.smallformat { %s; %s }\n", CSS_FONT_INHERIT, + CSS_FONT_SMALLER); + add_word_args (" pre.smallexample { %s }\n", CSS_FONT_SMALLER); + add_word_args (" pre.smalllisp { %s }\n", CSS_FONT_SMALLER); + + /* Since HTML doesn't have a sc element, we use span with a bit of + CSS spice instead. */ +#define CSS_FONT_SMALL_CAPS "font-variant:small-caps" + add_word_args (" span.sc { %s }\n", CSS_FONT_SMALL_CAPS); + /* Roman (default) font class, closest we can come. */ +#define CSS_FONT_ROMAN "font-family:serif; font-weight:normal;" + add_word_args (" span.roman { %s } \n", CSS_FONT_ROMAN); + /* Sans serif font class. */ +#define CSS_FONT_SANSSERIF "font-family:sans-serif; font-weight:normal;" + add_word_args (" span.sansserif { %s } \n", CSS_FONT_SANSSERIF); + + /* Write out any css code from the user's --css-file. */ + if (css_inline) + insert_string (css_inline); + + add_word ("--></style>\n"); + } + + add_word ("</head>\n<body>\n"); + + if (title && !html_title_written && titlepage_cmd_present) + { + add_word_args ("<h1 class=\"settitle\">%s</h1>\n", html_title); + html_title_written = 1; + } + + free (encoding); +} /* Escape HTML special characters in the string if necessary, returning a pointer to a possibly newly-allocated one. */ char * -escape_string (string) - char * string; +escape_string (char *string) { - int i=0, newlen=0; - char * newstring; + char *newstring; + int i = 0, newlen = 0; do { /* Find how much to allocate. */ switch (string[i]) { + case '"': + newlen += 6; /* `"' */ + break; case '&': newlen += 5; /* `&' */ break; @@ -315,6 +373,10 @@ escape_string (string) { switch (string[i]) { + case '"': + strcpy (newstring, """); + newstring += 6; + break; case '&': strcpy (newstring, "&"); newstring += 5; @@ -336,24 +398,22 @@ escape_string (string) free (string); return newstring - newlen; } - - /* Save current tag. */ -void -push_tag (tag) - char *tag; +static void +push_tag (char *tag, char *attribs) { HSTACK *newstack = xmalloc (sizeof (HSTACK)); newstack->tag = tag; + newstack->attribs = xstrdup (attribs); newstack->next = htmlstack; htmlstack = newstack; } /* Get last tag. */ -void -pop_tag () +static void +pop_tag (void) { HSTACK *tos = htmlstack; @@ -363,77 +423,162 @@ pop_tag () return; } + free (htmlstack->attribs); + htmlstack = htmlstack->next; free (tos); } +/* Check if tag is an empty or a whitespace only element. + If so, remove it, keeping whitespace intact. */ +int +rollback_empty_tag (char *tag) +{ + int check_position = output_paragraph_offset; + int taglen = strlen (tag); + int rollback_happened = 0; + char *contents = ""; + char *contents_canon_white = ""; + + /* If output_paragraph is empty, we cannot rollback :-\ */ + if (output_paragraph_offset <= 0) + return 0; + + /* Find the end of the previous tag. */ + while (output_paragraph[check_position-1] != '>' && check_position > 0) + check_position--; + + /* Save stuff between tag's end to output_paragraph's end. */ + if (check_position != output_paragraph_offset) + { + contents = xmalloc (output_paragraph_offset - check_position + 1); + memcpy (contents, output_paragraph + check_position, + output_paragraph_offset - check_position); + + contents[output_paragraph_offset - check_position] = '\0'; + + contents_canon_white = xstrdup (contents); + canon_white (contents_canon_white); + } + + /* Find the start of the previous tag. */ + while (output_paragraph[check_position-1] != '<' && check_position > 0) + check_position--; + + /* Check to see if this is the tag. */ + if (strncmp ((char *) output_paragraph + check_position, tag, taglen) == 0 + && (whitespace (output_paragraph[check_position + taglen]) + || output_paragraph[check_position + taglen] == '>')) + { + if (!contents_canon_white || !*contents_canon_white) + { + /* Empty content after whitespace removal, so roll it back. */ + output_paragraph_offset = check_position - 1; + rollback_happened = 1; + + /* Original contents may not be empty (whitespace.) */ + if (contents && *contents) + { + insert_string (contents); + free (contents); + } + } + } + + return rollback_happened; +} + /* Open or close TAG according to START_OR_END. */ void -insert_html_tag (start_or_end, tag) +#if defined (VA_FPRINTF) && __STDC__ +insert_html_tag_with_attribute (int start_or_end, char *tag, char *format, ...) +#else +insert_html_tag_with_attribute (start_or_end, tag, format, va_alist) int start_or_end; char *tag; + char *format; + va_dcl +#endif { char *old_tag = NULL; + char *old_attribs = NULL; + char formatted_attribs[2000]; /* xx no fixed limits */ int do_return = 0; - - if (!paragraph_is_open && (start_or_end == START)) - { - /* Need to compensate for the <p> we are about to insert, or - else cm_xxx functions that call us will get wrong text - between START and END. */ - adjust_braces_following (output_paragraph_offset, 3); - add_word ("<p>"); - } + extern int in_html_elt; if (start_or_end != START) pop_tag (); if (htmlstack) - old_tag = htmlstack->tag; + { + old_tag = htmlstack->tag; + old_attribs = htmlstack->attribs; + } + + if (format) + { +#ifdef VA_SPRINTF + va_list ap; +#endif + + VA_START (ap, format); +#ifdef VA_SPRINTF + VA_SPRINTF (formatted_attribs, format, ap); +#else + sprintf (formatted_attribs, format, a1, a2, a3, a4, a5, a6, a7, a8); +#endif + va_end (ap); + } + else + formatted_attribs[0] = '\0'; + /* Exception: can nest multiple spans. */ if (htmlstack - && (strcmp (htmlstack->tag, tag) == 0)) + && STREQ (htmlstack->tag, tag) + && !(STREQ (tag, "span") && STREQ (old_attribs, formatted_attribs))) do_return = 1; if (start_or_end == START) - push_tag (tag); + push_tag (tag, formatted_attribs); if (do_return) return; + in_html_elt++; + /* texinfo.tex doesn't support more than one font attribute at the same time. */ - if ((start_or_end == START) && old_tag && *old_tag) - { - add_word ("</"); - add_word (old_tag); - add_char ('>'); - } + if ((start_or_end == START) && old_tag && *old_tag + && !rollback_empty_tag (old_tag)) + add_word_args ("</%s>", old_tag); if (*tag) { - add_char ('<'); - if (start_or_end != START) - add_char ('/'); - add_word (tag); - add_char ('>'); + if (start_or_end == START) + add_word_args (format ? "<%s %s>" : "<%s>", tag, formatted_attribs); + else if (!rollback_empty_tag (tag)) + /* Insert close tag only if we didn't rollback, + in which case the opening tag is removed. */ + add_word_args ("</%s>", tag); } if ((start_or_end != START) && old_tag && *old_tag) - { - add_char ('<'); - add_word (old_tag); - add_char ('>'); - } -} + add_word_args (strlen (old_attribs) > 0 ? "<%s %s>" : "<%s>", + old_tag, old_attribs); + in_html_elt--; +} +void +insert_html_tag (int start_or_end, char *tag) +{ + insert_html_tag_with_attribute (start_or_end, tag, NULL); +} /* Output an HTML <link> to the filename for NODE, including the other string as extra attributes. */ void -add_link (nodename, attributes) - char *nodename, *attributes; +add_link (char *nodename, char *attributes) { if (nodename) { @@ -441,39 +586,64 @@ add_link (nodename, attributes) add_word_args ("%s", attributes); add_word_args (" href=\""); add_anchor_name (nodename, 1); - add_word ("\">\n"); + add_word_args ("\" title=\"%s\">\n", nodename); } } /* Output NAME with characters escaped as appropriate for an anchor - name, i.e., escape URL special characters as %<n>. */ + name, i.e., escape URL special characters with our _00hh convention + if OLD is zero. (See the manual for details on the new scheme.) + + If OLD is nonzero, generate the node name with the 4.6-and-earlier + convention of %hh (and more special characters output as-is, notably + - and *). This is only so that external references to old names can + still work with HTML generated by the new makeinfo; the gcc folks + needed this. Our own HTML does not refer to these names. */ + void -add_escaped_anchor_name (name) - char *name; +add_escaped_anchor_name (char *name, int old) { + canon_white (name); + + if (!old && !strchr ("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", + *name)) + { /* XHTML does not allow anything but an ASCII letter to start an + identifier. Therefore kludge in this constant string if we + have a nonletter. */ + add_word ("g_t"); + } + for (; *name; name++) { - if (*name == '&') - add_word ("&"); - else if (! URL_SAFE_CHAR (*name)) + if (cr_or_whitespace (*name)) + add_char ('-'); + + else if (!old && !URL_SAFE_CHAR (*name)) /* Cast so characters with the high bit set are treated as >128, for example o-umlaut should be 246, not -10. */ + add_word_args ("_00%x", (unsigned char) *name); + + else if (old && !URL_SAFE_CHAR (*name) && !OLD_URL_SAFE_CHAR (*name)) + /* Different output convention, but still cast as above. */ add_word_args ("%%%x", (unsigned char) *name); + else add_char (*name); } } /* Insert the text for the name of a reference in an HTML anchor - appropriate for NODENAME. If HREF is nonzero, it will be - appropriate for a href= attribute, rather than name= i.e., including - the `#' if it's an internal reference. */ + appropriate for NODENAME. + + If HREF is zero, generate text for name= in the new node name + conversion convention. + If HREF is negative, generate text for name= in the old convention. + If HREF is positive, generate the name for an href= attribute, i.e., + including the `#' if it's an internal reference. */ void -add_anchor_name (nodename, href) - char *nodename; - int href; +add_anchor_name (char *nodename, int href) { - if (href) + if (href > 0) { if (splitting) add_url_name (nodename, href); @@ -486,38 +656,52 @@ add_anchor_name (nodename, href) if (strcasecmp (nodename, "(dir)") == 0) /* Strip the parens, but keep the original letter-case. */ add_word_args ("%.3s", nodename + 1); + else if (strcasecmp (nodename, "top") == 0) + add_word ("Top"); else - add_escaped_anchor_name (nodename); + add_escaped_anchor_name (nodename, href < 0); } /* Insert the text for the name of a reference in an HTML url, aprropriate for NODENAME */ void -add_url_name (nodename, href) - char *nodename; - int href; +add_url_name (char *nodename, int href) { add_nodename_to_filename (nodename, href); } -/* Only allow [-0-9a-zA-Z_.] when nodifying filenames. This may - result in filename clashes; e.g., +/* Convert non [A-Za-z0-9] to _00xx, where xx means the hexadecimal + representation of the ASCII character. Also convert spaces and + newlines to dashes. */ +static void +fix_filename (char *filename) +{ + int i; + int len = strlen (filename); + char *oldname = xstrdup (filename); - @node Foo ],,, - @node Foo [,,, + *filename = '\0'; - both map to Foo--.html. If that happens, cm_node will put all - the nodes whose file names clash on the same file. */ -void -fix_filename (filename) - char *filename; -{ - char *p; - for (p = filename; *p; p++) + for (i = 0; i < len; i++) { - if (!(isalnum (*p) || strchr ("-._", *p))) - *p = '-'; + if (cr_or_whitespace (oldname[i])) + strcat (filename, "-"); + else if (URL_SAFE_CHAR (oldname[i])) + strncat (filename, (char *) oldname + i, 1); + else + { + char *hexchar = xmalloc (6 * sizeof (char)); + sprintf (hexchar, "_00%x", (unsigned char) oldname[i]); + strcat (filename, hexchar); + free (hexchar); + } + + /* Check if we are nearing boundaries. */ + if (strlen (filename) >= PATH_MAX - 20) + break; } + + free (oldname); } /* As we can't look-up a (forward-referenced) nodes' html filename @@ -525,9 +709,7 @@ fix_filename (filename) nodenames are unique, and generate the html filename from the nodename, that's always known. */ static char * -nodename_to_filename_1 (nodename, href) - char *nodename; - int href; +nodename_to_filename_1 (char *nodename, int href) { char *p; char *filename; @@ -623,9 +805,7 @@ nodename_to_filename_1 (nodename, href) /* If necessary, ie, if current filename != filename of node, output the node name. */ void -add_nodename_to_filename (nodename, href) - char *nodename; - int href; +add_nodename_to_filename (char *nodename, int href) { /* for now, don't check: always output filename */ char *filename = nodename_to_filename_1 (nodename, href); @@ -634,8 +814,7 @@ add_nodename_to_filename (nodename, href) } char * -nodename_to_filename (nodename) - char *nodename; +nodename_to_filename (char *nodename) { /* The callers of nodename_to_filename use the result to produce <a href=, so call nodename_to_filename_1 with last arg non-zero. */ |