aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrooks Davis <brooks@FreeBSD.org>2024-11-25 21:05:36 +0000
committerBrooks Davis <brooks@FreeBSD.org>2024-11-25 21:05:36 +0000
commit20de55b07cf91a0b07ec94e655f5740f56cdd541 (patch)
tree66184d353c7a1ecd33b26464cc055b503abba7c5
parente9bf778aefc154b6bdefd2772f7fd85819f465b1 (diff)
Vendor import of mandoc as of 2024-09-22vendor/mandoc/20240922vendor/mandoc
-rw-r--r--LICENSE5
-rw-r--r--Makefile13
-rw-r--r--Makefile.depend89
-rw-r--r--NEWS4
-rw-r--r--TODO105
-rw-r--r--catman.c4
-rw-r--r--cgi.c121
-rw-r--r--chars.c5
-rw-r--r--compat_strsep.c4
-rwxr-xr-xconfigure13
-rw-r--r--configure.local.example14
-rw-r--r--dba_read.c4
-rw-r--r--demandoc.15
-rw-r--r--demandoc.c13
-rw-r--r--eqn.c28
-rw-r--r--eqn_parse.h6
-rw-r--r--html.c17
-rw-r--r--html.h6
-rw-r--r--main.c43
-rw-r--r--makewhatis.88
-rw-r--r--man.18
-rw-r--r--man.734
-rw-r--r--man.c43
-rw-r--r--man.cgi.814
-rw-r--r--man_html.c112
-rw-r--r--man_macro.c24
-rw-r--r--man_term.c108
-rw-r--r--man_validate.c68
-rw-r--r--mandoc.1206
-rw-r--r--mandoc.c394
-rw-r--r--mandoc.css51
-rw-r--r--mandoc.h32
-rw-r--r--mandoc_aux.c5
-rw-r--r--mandoc_aux.h8
-rw-r--r--mandoc_char.721
-rw-r--r--mandoc_dbg.c342
-rw-r--r--mandoc_dbg.h55
-rw-r--r--mandoc_dbg_init.3280
-rw-r--r--mandoc_escape.3172
-rw-r--r--mandoc_headers.337
-rw-r--r--mandoc_html.38
-rw-r--r--mandoc_msg.c13
-rw-r--r--mandocd.88
-rw-r--r--mandocd.c15
-rw-r--r--mandocdb.c75
-rw-r--r--manpath.c87
-rw-r--r--mansearch.c6
-rw-r--r--mdoc.775
-rw-r--r--mdoc_html.c81
-rw-r--r--mdoc_macro.c12
-rw-r--r--mdoc_man.c4
-rw-r--r--mdoc_markdown.c6
-rw-r--r--mdoc_state.c53
-rw-r--r--mdoc_term.c29
-rw-r--r--mdoc_validate.c16
-rw-r--r--out.c39
-rw-r--r--out.h7
-rw-r--r--read.c9
-rw-r--r--roff.743
-rw-r--r--roff.c858
-rw-r--r--roff.h27
-rw-r--r--roff_escape.c546
-rw-r--r--roff_int.h5
-rw-r--r--roff_term.c4
-rw-r--r--st.c4
-rw-r--r--tag.c74
-rw-r--r--tbl.712
-rw-r--r--tbl_html.c15
-rw-r--r--tbl_term.c23
-rw-r--r--term.c137
-rw-r--r--term.h7
-rw-r--r--term_ascii.c9
-rw-r--r--term_tab.c24
-rw-r--r--tree.c14
74 files changed, 3212 insertions, 1654 deletions
diff --git a/LICENSE b/LICENSE
index 0a0fc1acd2ac..8b464f4e6aec 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,11 +1,11 @@
-$Id: LICENSE,v 1.22 2021/09/19 11:02:09 schwarze Exp $
+$Id: LICENSE,v 1.23 2022/06/25 12:44:25 schwarze Exp $
With the exceptions noted below, all non-trivial files contained
in the mandoc toolkit are protected by the Copyright of the following
developers:
+Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
-Copyright (c) 2010-2021 Ingo Schwarze <schwarze@openbsd.org>
Copyright (c) 1999, 2004, 2017 Marc Espie <espie@openbsd.org>
Copyright (c) 2009, 2010, 2011, 2012 Joerg Sonnenberger <joerg@netbsd.org>
Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
@@ -13,6 +13,7 @@ Copyright (c) 2014 Baptiste Daroussin <bapt@freebsd.org>
Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
Copyright (c) 2017 Anthony Bentley <bentley@openbsd.org>
+Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
Copyright (c) 1998, 2004, 2010, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
Copyright (c) 2008, 2017 Otto Moerbeek <otto@drijf.net>
Copyright (c) 2004 Ted Unangst <tedu@openbsd.org>
diff --git a/Makefile b/Makefile
index 48c4741812b6..cd3f16652069 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-# $Id: Makefile,v 1.540 2021/09/21 11:04:40 schwarze Exp $
+# $Id: Makefile,v 1.543 2023/10/19 11:45:42 schwarze Exp $
#
-# Copyright (c) 2011, 2013-2021 Ingo Schwarze <schwarze@openbsd.org>
+# Copyright (c) 2011, 2013-2022 Ingo Schwarze <schwarze@openbsd.org>
# Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
#
# Permission to use, copy, modify, and distribute this software for any
@@ -100,6 +100,7 @@ SRCS = arch.c \
man_validate.c \
mandoc.c \
mandoc_aux.c \
+ mandoc_dbg.c \
mandoc_msg.c \
mandoc_ohash.c \
mandoc_xr.c \
@@ -121,6 +122,7 @@ SRCS = arch.c \
preconv.c \
read.c \
roff.c \
+ roff_escape.c \
roff_html.c \
roff_term.c \
roff_validate.c \
@@ -186,6 +188,8 @@ DISTFILES = INSTALL \
mandoc.h \
mandoc_aux.h \
mandoc_char.7 \
+ mandoc_dbg.h \
+ mandoc_dbg_init.3 \
mandoc_escape.3 \
mandoc_headers.3 \
mandoc_html.3 \
@@ -232,6 +236,7 @@ LIBMDOC_OBJS = att.o \
LIBROFF_OBJS = eqn.o \
roff.o \
+ roff_escape.o \
roff_validate.o \
tbl.o \
tbl_data.o \
@@ -241,6 +246,7 @@ LIBROFF_OBJS = eqn.o \
LIBMANDOC_OBJS = $(LIBMAN_OBJS) \
$(LIBMDOC_OBJS) \
$(LIBROFF_OBJS) \
+ $(DEBUG_OBJS) \
arch.o \
chars.o \
mandoc.o \
@@ -333,6 +339,7 @@ WWW_MANS = apropos.1.html \
soelim.1.html \
man.cgi.3.html \
mandoc.3.html \
+ mandoc_dbg_init.3.html \
mandoc_escape.3.html \
mandoc_headers.3.html \
mandoc_html.3.html \
@@ -409,6 +416,7 @@ base-install: mandoc demandoc soelim
mkdir -p $(DESTDIR)$(MANDIR)/man5
mkdir -p $(DESTDIR)$(MANDIR)/man7
mkdir -p $(DESTDIR)$(MANDIR)/man8
+ mkdir -p $(DESTDIR)$(MISCDIR)
$(INSTALL_PROGRAM) mandoc demandoc $(DESTDIR)$(BINDIR)
$(INSTALL_PROGRAM) soelim $(DESTDIR)$(BINDIR)/$(BINM_SOELIM)
cd $(DESTDIR)$(BINDIR) && $(LN) mandoc $(BINM_MAN)
@@ -431,6 +439,7 @@ base-install: mandoc demandoc soelim
$(INSTALL_MAN) mandoc_char.7 $(DESTDIR)$(MANDIR)/man7
$(INSTALL_MAN) makewhatis.8 \
$(DESTDIR)$(MANDIR)/man8/$(BINM_MAKEWHATIS).8
+ $(INSTALL_DATA) mandoc.css $(DESTDIR)$(MISCDIR)
lib-install: libmandoc.a
mkdir -p $(DESTDIR)$(LIBDIR)
diff --git a/Makefile.depend b/Makefile.depend
index d5f6556c3e7e..5179e95d4715 100644
--- a/Makefile.depend
+++ b/Makefile.depend
@@ -1,8 +1,8 @@
arch.o: arch.c config.h roff.h
att.o: att.c config.h roff.h libmdoc.h
catman.o: catman.c config.h compat_fts.h
-cgi.o: cgi.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h main.h manconf.h mansearch.h cgi.h
-chars.o: chars.c config.h mandoc.h mandoc_aux.h mandoc_ohash.h compat_ohash.h libmandoc.h
+cgi.o: cgi.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h main.h manconf.h mansearch.h cgi.h
+chars.o: chars.c config.h mandoc.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h libmandoc.h
compat_err.o: compat_err.c config.h
compat_fts.o: compat_fts.c config.h compat_fts.h
compat_getline.o: compat_getline.c config.h
@@ -22,62 +22,63 @@ compat_strndup.o: compat_strndup.c config.h
compat_strsep.o: compat_strsep.c config.h
compat_strtonum.o: compat_strtonum.c config.h
compat_vasprintf.o: compat_vasprintf.c config.h
-dba.o: dba.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mansearch.h dba_write.h dba_array.h dba.h
-dba_array.o: dba_array.c config.h mandoc_aux.h dba_write.h dba_array.h
-dba_read.o: dba_read.c config.h mandoc_aux.h mansearch.h dba_array.h dba.h dbm.h
+dba.o: dba.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h mansearch.h dba_write.h dba_array.h dba.h
+dba_array.o: dba_array.c config.h mandoc_aux.h mandoc_dbg.h dba_write.h dba_array.h
+dba_read.o: dba_read.c config.h mandoc_aux.h mandoc_dbg.h mansearch.h dba_array.h dba.h dbm.h
dba_write.o: dba_write.c config.h dba_write.h
dbm.o: dbm.c config.h mansearch.h dbm_map.h dbm.h
dbm_map.o: dbm_map.c config.h mansearch.h dbm_map.h dbm.h
-demandoc.o: demandoc.c config.h mandoc.h roff.h man.h mdoc.h mandoc_parse.h
-eqn.o: eqn.c config.h mandoc_aux.h mandoc.h roff.h eqn.h libmandoc.h eqn_parse.h
+demandoc.o: demandoc.c config.h mandoc.h mandoc_dbg.h roff.h man.h mdoc.h mandoc_parse.h
+eqn.o: eqn.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h eqn.h libmandoc.h eqn_parse.h
eqn_html.o: eqn_html.c config.h mandoc.h roff.h eqn.h out.h html.h
eqn_term.o: eqn_term.c config.h eqn.h out.h term.h
-html.o: html.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h out.h html.h manconf.h main.h
+html.o: html.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h out.h html.h manconf.h main.h
lib.o: lib.c config.h roff.h libmdoc.h lib.in
-main.o: main.c config.h mandoc_aux.h mandoc.h mandoc_xr.h roff.h mdoc.h man.h mandoc_parse.h tag.h term_tag.h main.h manconf.h mansearch.h
-man.o: man.c config.h mandoc_aux.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h
-man_html.o: man_html.c config.h mandoc_aux.h mandoc.h roff.h man.h out.h html.h main.h
-man_macro.o: man_macro.c config.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h
-man_term.o: man_term.c config.h mandoc_aux.h mandoc.h roff.h man.h out.h term.h term_tag.h main.h
-man_validate.o: man_validate.c config.h mandoc_aux.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h tag.h
-mandoc.o: mandoc.c config.h mandoc_aux.h mandoc.h roff.h libmandoc.h roff_int.h
-mandoc_aux.o: mandoc_aux.c config.h mandoc.h mandoc_aux.h
+main.o: main.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h mandoc_xr.h roff.h mdoc.h man.h mandoc_parse.h tag.h term_tag.h main.h manconf.h mansearch.h
+man.o: man.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h
+man_html.o: man_html.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h man.h out.h html.h main.h
+man_macro.o: man_macro.c config.h mandoc_dbg.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h
+man_term.o: man_term.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h man.h out.h term.h term_tag.h main.h
+man_validate.o: man_validate.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h man.h libmandoc.h roff_int.h libman.h tag.h
+mandoc.o: mandoc.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h libmandoc.h roff_int.h
+mandoc_aux.o: mandoc_aux.c config.h mandoc.h mandoc_aux.h mandoc_dbg.h
+mandoc_dbg.o: mandoc_dbg.c config.h compat_ohash.h mandoc_aux.h mandoc_dbg.h mandoc.h
mandoc_msg.o: mandoc_msg.c config.h mandoc.h
-mandoc_ohash.o: mandoc_ohash.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h
-mandoc_xr.o: mandoc_xr.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mandoc_xr.h
-mandocd.o: mandocd.c config.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h main.h manconf.h
-mandocdb.o: mandocdb.c config.h compat_fts.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h manconf.h mansearch.h dba_array.h dba.h
-manpath.o: manpath.c config.h mandoc_aux.h mandoc.h manconf.h
-mansearch.o: mansearch.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h manconf.h mansearch.h dbm.h
-mdoc.o: mdoc.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
-mdoc_argv.o: mdoc_argv.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
-mdoc_html.o: mdoc_html.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h out.h html.h main.h
-mdoc_macro.o: mdoc_macro.c config.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
-mdoc_man.o: mdoc_man.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h out.h main.h
-mdoc_markdown.o: mdoc_markdown.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h main.h
-mdoc_state.o: mdoc_state.c config.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
-mdoc_term.o: mdoc_term.c config.h mandoc_aux.h roff.h mdoc.h out.h term.h term_tag.h main.h
-mdoc_validate.o: mdoc_validate.c config.h mandoc_aux.h mandoc.h mandoc_xr.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h tag.h
+mandoc_ohash.o: mandoc_ohash.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h
+mandoc_xr.o: mandoc_xr.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h mandoc_xr.h
+mandocd.o: mandocd.c config.h mandoc.h mandoc_dbg.h roff.h mdoc.h man.h mandoc_parse.h main.h manconf.h
+mandocdb.o: mandocdb.c config.h compat_fts.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h manconf.h mansearch.h dba_array.h dba.h
+manpath.o: manpath.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h manconf.h
+mansearch.o: mansearch.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h manconf.h mansearch.h dbm.h
+mdoc.o: mdoc.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
+mdoc_argv.o: mdoc_argv.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
+mdoc_html.o: mdoc_html.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h out.h html.h main.h
+mdoc_macro.o: mdoc_macro.c config.h mandoc_dbg.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
+mdoc_man.o: mdoc_man.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h man.h out.h main.h
+mdoc_markdown.o: mdoc_markdown.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h main.h
+mdoc_state.o: mdoc_state.c config.h mandoc_dbg.h mandoc.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h
+mdoc_term.o: mdoc_term.c config.h mandoc_aux.h mandoc_dbg.h roff.h mdoc.h out.h term.h term_tag.h main.h
+mdoc_validate.o: mdoc_validate.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h mandoc_xr.h roff.h mdoc.h libmandoc.h roff_int.h libmdoc.h tag.h
msec.o: msec.c config.h mandoc.h libmandoc.h msec.in
-out.o: out.c config.h mandoc_aux.h mandoc.h tbl.h out.h
+out.o: out.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h tbl.h out.h
preconv.o: preconv.c config.h mandoc.h roff.h mandoc_parse.h libmandoc.h
-read.o: read.c config.h mandoc_aux.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h libmandoc.h roff_int.h tag.h
-roff.o: roff.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h mandoc_parse.h libmandoc.h roff_int.h tbl_parse.h eqn_parse.h predefs.in
+read.o: read.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h roff.h mdoc.h man.h mandoc_parse.h libmandoc.h roff_int.h tag.h
+roff.o: roff.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h mandoc.h roff.h mandoc_parse.h libmandoc.h roff_int.h tbl_parse.h eqn_parse.h predefs.in
roff_html.o: roff_html.c config.h mandoc.h roff.h out.h html.h
roff_term.o: roff_term.c config.h mandoc.h roff.h out.h term.h
roff_validate.o: roff_validate.c config.h mandoc.h roff.h libmandoc.h roff_int.h
soelim.o: soelim.c config.h compat_stringlist.h
st.o: st.c config.h mandoc.h roff.h libmdoc.h
-tag.o: tag.c config.h mandoc_aux.h mandoc_ohash.h compat_ohash.h roff.h mdoc.h roff_int.h tag.h
-tbl.o: tbl.c config.h mandoc_aux.h mandoc.h tbl.h libmandoc.h tbl_parse.h tbl_int.h
-tbl_data.o: tbl_data.c config.h mandoc_aux.h mandoc.h tbl.h libmandoc.h tbl_int.h
-tbl_html.o: tbl_html.c config.h mandoc.h roff.h tbl.h out.h html.h
-tbl_layout.o: tbl_layout.c config.h mandoc_aux.h mandoc.h tbl.h libmandoc.h tbl_int.h
+tag.o: tag.c config.h mandoc_aux.h mandoc_dbg.h mandoc_ohash.h compat_ohash.h roff.h mdoc.h roff_int.h tag.h
+tbl.o: tbl.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h tbl.h libmandoc.h tbl_parse.h tbl_int.h
+tbl_data.o: tbl_data.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h tbl.h libmandoc.h tbl_int.h
+tbl_html.o: tbl_html.c config.h mandoc_dbg.h mandoc.h roff.h tbl.h out.h html.h
+tbl_layout.o: tbl_layout.c config.h mandoc_aux.h mandoc_dbg.h mandoc.h tbl.h libmandoc.h tbl_int.h
tbl_opts.o: tbl_opts.c config.h mandoc.h tbl.h libmandoc.h tbl_int.h
-tbl_term.o: tbl_term.c config.h mandoc.h tbl.h out.h term.h
-term.o: term.c config.h mandoc.h mandoc_aux.h out.h term.h main.h
-term_ascii.o: term_ascii.c config.h mandoc.h mandoc_aux.h out.h term.h manconf.h main.h
-term_ps.o: term_ps.c config.h mandoc_aux.h out.h term.h manconf.h main.h
-term_tab.o: term_tab.c config.h mandoc_aux.h out.h term.h
+tbl_term.o: tbl_term.c config.h mandoc_dbg.h mandoc.h tbl.h out.h term.h
+term.o: term.c config.h mandoc.h mandoc_aux.h mandoc_dbg.h out.h term.h main.h
+term_ascii.o: term_ascii.c config.h mandoc.h mandoc_aux.h mandoc_dbg.h out.h term.h manconf.h main.h
+term_ps.o: term_ps.c config.h mandoc_aux.h mandoc_dbg.h out.h term.h manconf.h main.h
+term_tab.o: term_tab.c config.h mandoc_aux.h mandoc_dbg.h out.h term.h
term_tag.o: term_tag.c config.h mandoc.h roff.h roff_int.h tag.h term_tag.h
tree.o: tree.c config.h mandoc.h roff.h mdoc.h man.h tbl.h eqn.h main.h
diff --git a/NEWS b/NEWS
index 634ffaf6ccfa..fdec026e9fec 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,4 @@
-$Id: NEWS,v 1.40 2021/09/23 18:03:00 schwarze Exp $
+$Id: NEWS,v 1.41 2021/09/25 15:42:08 schwarze Exp $
This file lists the most important changes in the mandoc.bsd.lv distribution.
@@ -195,7 +195,7 @@ Changes in version 1.14.6, released on September 23, 2021
for an additional regression test
* Michal Nowak for reporting several code style issues
* TJ Townsend (OpenBSD) for help with CSS
- * Sevan Janiyan (NetBSD) and Robert Mustacchi (Illumos)
+ * Sevan Janiyan (Viewpoint Linux) and Robert Mustacchi (Illumos)
for extensive release testing
* Job Snijders, Kinichiro INOGUCHI, and Martijn van Duren (OpenBSD)
for checking patches
diff --git a/TODO b/TODO
index fe2059c9e5d5..970aa936c16c 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,6 @@
************************************************************************
* Official mandoc TODO.
-* $Id: TODO,v 1.319 2021/09/21 17:58:13 schwarze Exp $
+* $Id: TODO,v 1.335 2024/09/21 12:08:54 schwarze Exp $
************************************************************************
Many issues are annotated for difficulty as follows:
@@ -68,6 +68,14 @@ are mere guesses, and some may be wrong.
but watch out for regressions in the high-level parsers
maybe it should not even remove comments? - consider T{\"
+- In the body of conditional requests, escape sequence expansion
+ must not be performed if the condition is false. This implies
+ the first part of a request line must be expanded before
+ request parsing (like it is now), but expansion in the second
+ part must be delayed.
+ to Nab 8 Aug 2023 20:05:32 +0200 Subject: if/ie d condition always true
+ loc ** exist *** algo *** size ** imp *
+
************************************************************************
* missing features
@@ -149,6 +157,11 @@ are mere guesses, and some may be wrong.
--- missing mdoc features ----------------------------------------------
+- support mixed case for section names
+ also, first section is not "NAME" should not appear more than once per page
+ Alejandro Colomar 28 Apr 2023 16:57:49 +0200
+ loc * exist * algo * size * imp ***
+
- .Sh and .Ss should be parsed and partially callable, see groff_mdoc(7)
reed at reedmedia dot net Sat, 21 Dec 2019 17:13:07 -0600
loc ** exist ** algo ** size ** imp *
@@ -292,7 +305,11 @@ are mere guesses, and some may be wrong.
--- missing misc features ----------------------------------------------
-- conisder whether man(1) fallback code in main.c/fs_*() can find files
+- use the default volume headers for sections with suffixes
+ certainly affects man(7); possibly mdoc(7)?; and also groff(1)
+ Alejandro Colomar 21 Aug 2022
+
+- consider whether man(1) fallback code in main.c/fs_*() can find files
like man3c/fopen.3c (illumos, Solaris) and man3p/fopen.3p (POSIX)
discussed with Robert Mustacchi 21 Sep 2021 10:39:40 -0700
loc * exist * algo ** size * imp **
@@ -302,6 +319,11 @@ are mere guesses, and some may be wrong.
mail to sternenseemann 19 Aug 2021 19:11:50 +0200
loc * exist ** algo ** size * imp **
+- handle Unicode letters in tags in both HTML and terminal output
+ thread "section headers with diacritics" starting with
+ Mario Blaettermann 24 Mar 2022 18:13:23 +0100
+ loc ** exist * algo * size * imp **
+
- -T man does not handle eqn(7) and tbl(7)
Stephen Gregoratto 16 Feb 2020 01:28:07 +1100
also https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=901636
@@ -325,6 +347,10 @@ are mere guesses, and some may be wrong.
(3) undefined, just output the character -> perhaps WARNING
loc *** exist ** algo ** size ** imp *** (parser reorg helps)
+- man.conf(5) alias aliasname dirname or just -Mb -Mx -Mp
+ mail to jmc@ Mar 23, 2015 03:53:14PM +0100
+ loc * exist * algo * size * imp **
+
- kettenis wants base roff, ms, and me Fri, 1 Jan 2010 22:13:15 +0100 (CET)
loc ** exist ** algo ** size *** imp *
@@ -443,6 +469,10 @@ are mere guesses, and some may be wrong.
reminded by jmc@ Thu, 23 Sep 2010 18:13:39 +0059
loc * exist ** algo *** size * imp ***
+- the man(7) single-font macros (e.g. .B) use .itc,
+ so ".B foo\c" followed by "bar" prints "bar" in bold
+ gbranden@ Sun, 5 Jun 2022 18:08:46 -0500
+
- a line starting with "\fB something" counts as starting with whitespace
and triggers a line break; found in audio/normalize-mp3(1)
This will become easier once escape sequences are represented
@@ -468,20 +498,46 @@ are mere guesses, and some may be wrong.
--- HTML issues --------------------------------------------------------
-- make the HTML scaffolding customozable with -O skip=...
+- support the idiom .TP .IP .TP for multi-paragraph list item bodies
+ to: Alejandro Colomar Thu, 19 Oct 2023 16:45:21 +0200
+ loc ** exist ** algo ** size ** imp **
+
+- .Nm without an argument and .Bx cause premature </pre>
+ Nab Sun, 5 Jun 2022 18:30:09 +0200
+
+- .Aq Mt could set and reset "white-space: nowrap";
+ Check whether other enclosure macros could profit from similar handling,
+ or whether that is covered by Unicode line-breaking classes WJ, ZW, GL, ZWJ.
+ John Gardner 25 Mar 2022 04:44:27 +1100
+
+- make the HTML scaffolding customizable with -O skip=...
mail to Oliver Corff 3 Jun 2021 17:28:02 +0200
more feedback from Oliver 3 Jun 2021 18:27:56 +0200
more feedback from Oliver 3 Jun 2021 23:37:18 +0200
+ would also be useful for
+ https://github.com/gbdev/rgbds-www/blob/master/
+ maintainer/support/man_postproc.awk
- .Bd -unfilled should not use monospaced font
anton@ 4 Mar 2021 08:19:35 +0100
loc ** exist * algo * size * imp **
-- HTML formatting of .nf should avoid <br/>
+- HTML formatting of .nf should avoid <br/>,
+ even when input lines start with whitespace,
and not close and re-open <pre> on .P
my mail to ports@ 27 Jun 2021 16:09:20 +0200
+ reported again by Mohamed Akram 25 Jun 2022 16:28:18 +0000
loc ** exist ** algo * size * imp **
+- tbl(7) HTML output does not implement column width specifications
+ reported by Ted Bullock 11 Jan 2022 16:00:44 -0700
+ loc * exist * algo ? size ? imp *
+
+- link from flags in the SYNOPSIS to their descriptions
+ https://github.com/gbdev/rgbds-www/blob/master/
+ maintainer/support/man_postproc.awk
+ loc * exist * algo ** size * imp *
+
- get rid of the last handful of style= attributes such that
Content-Security-Policy: can be enabled without unsafe-inline
suggested by bentley@ Nov 10, 2019 at 06:02:49AM -0700
@@ -504,19 +560,18 @@ are mere guesses, and some may be wrong.
does this affect other characters, other source macros...?
Jackson Pauls 29 Aug 2017 16:56:27 +0100
-- The tables used to render the three-part page headers actually force
- the width of the <body> to the max-width given for <html>.
- Not yet sure how to fix that...
- Observed by an Anonymous Coward on undeadly.org:
- http://undeadly.org/cgi?action=article&sid=20140925064244&pid=1
- loc * exist * algo ** size * imp ***
-
- generate <img> tags in HTML
idea from florian@ Tue, 7 Apr 2015 00:26:28 +0000
may be possible to implement with .Lk img://something.png alt_text
- check https://github.com/trentm/mdocml
+--- CSS issues ---------------------------------------------------------
+
+- use flexbox for .Bl-tag instead of the fragile float/clear mechanism
+ John Gardner 25 Mar 2022 04:44:27 +1100
+
+
************************************************************************
* formatting issues: gratuitous differences
************************************************************************
@@ -527,7 +582,17 @@ are mere guesses, and some may be wrong.
Steffen Nurpmeso Sat, 08 Nov 2014 13:34:59 +0100
loc * exist ** algo ** size * imp **
-- In .Bl -enum -width 0n, groff continues one the same line after
+- Multiple issues with .In below SYNOPSIS; groff behaviour is:
+ text line + .In -> no line break before #include
+ called .In -> no line break before angle bracket
+ .In + .In -> second one gets #include, too
+ two arguments -> line break before second
+ child macro -> line break before child
+ .In + text line -> line break before the text line
+ Evan Silberman Fri, 20 Sep 2024 16:48:19 -0700
+ loc ** exist ** algo * size * imp *
+
+- In .Bl -enum -width 0n, groff continues on the same line after
the number, mandoc breaks the line.
mail to kristaps@ Mon, 20 Jul 2009 02:21:39 +0200
loc * exist ** algo ** size * imp **
@@ -562,6 +627,10 @@ are mere guesses, and some may be wrong.
reported again by Nicolas Joly Thu, 1 Mar 2012 13:41:26 +0100 via wiz@ 5 Mar
reported again by Franco Fichtner Fri, 27 Sep 2013 21:02:28 +0200
reported again by Bruce Evans Fri, 17 Feb 2017 21:22:44 +0100 via bapt@
+ https://reviews.freebsd.org/D35245
+ even groff_mdoc(7) uses this: Nab Sun, 5 Jun 2022 22:16:37 +0200
+ When implementing this, try to avoid breaking existing manuals,
+ or at least fix them: Jan Stary Sun, 5 Jun 2022 22:48:05 +0200
loc *** exist *** algo *** size ** imp ***
An easy partial fix would be to just skip the first word if it starts
with a dot, including any following white space, when measuring.
@@ -576,6 +645,10 @@ are mere guesses, and some may be wrong.
with .ps and .nf/.fi produce execessive blank lines, see libJudy
and graphics/dcmtk. The parser reorg may help with this.
+- The man(7) .UR macro produces UTF-8 angle brackets in -Tutf8 output mode
+ with groff, but ASCII <> with mandoc
+ Alejandro Colomar Mon, 7 Aug 2023 17:13:29 +0200 Subject: hostname
+
- trailing whitespace must be ignored even when followed by a font escape,
see for example
makes
@@ -592,6 +665,14 @@ are mere guesses, and some may be wrong.
To: deraadt@ 25 Oct 2020 23:37:01 +0100
loc ** exist * algo * size ** imp ***
+- warn about \\ and \. in interpretation mode
+ gbranden@, groff issue #62776, 10 Nov 2023 01:57:32 -0500
+
+- warn about output lines exceeding 80 characters
+ Alejandro Colomar Aug 22, 2022
+ not trivial because -T lint does not call any formatter
+ loc *** exist * algo ** size ** imp **
+
- warn about duplicate .Sh/.Ss heads
gre(4): Rename duplicate sections 20 Apr 2018 15:27:33 +0200
loc * exist * algo * size * imp **
diff --git a/catman.c b/catman.c
index b1bab0f68c4b..e46613eb0e8c 100644
--- a/catman.c
+++ b/catman.c
@@ -1,4 +1,4 @@
-/* $Id: catman.c,v 1.22 2020/06/14 23:40:31 schwarze Exp $ */
+/* $Id: catman.c,v 1.23 2021/10/15 15:04:02 schwarze Exp $ */
/*
* Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
* Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -62,7 +62,7 @@ run_mandocd(int sockfd, const char *outtype, const char* defos)
else
execlp("mandocd", "mandocd", "-T", outtype,
"-I", defos, sockfdstr, (char *)NULL);
- err(1, "exec");
+ err(1, "exec(mandocd)");
}
ssize_t
diff --git a/cgi.c b/cgi.c
index 91310ce404b4..57f3bb7a6e16 100644
--- a/cgi.c
+++ b/cgi.c
@@ -1,7 +1,8 @@
-/* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
+/* $Id: cgi.c,v 1.181 2023/04/28 19:11:03 schwarze Exp $ */
/*
- * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
+ * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -86,10 +87,10 @@ static void pg_search(const struct req *);
static void pg_searchres(const struct req *,
struct manpage *, size_t);
static void pg_show(struct req *, const char *);
-static void resp_begin_html(int, const char *, const char *);
+static int resp_begin_html(int, const char *, const char *);
static void resp_begin_http(int, const char *);
static void resp_catman(const struct req *, const char *);
-static void resp_copy(const char *);
+static int resp_copy(const char *, const char *);
static void resp_end_html(void);
static void resp_format(const struct req *, const char *);
static void resp_searchform(const struct req *, enum focus);
@@ -352,22 +353,26 @@ resp_begin_http(int code, const char *msg)
fflush(stdout);
}
-static void
-resp_copy(const char *filename)
+static int
+resp_copy(const char *element, const char *filename)
{
char buf[4096];
ssize_t sz;
int fd;
- if ((fd = open(filename, O_RDONLY)) != -1) {
- fflush(stdout);
- while ((sz = read(fd, buf, sizeof(buf))) > 0)
- write(STDOUT_FILENO, buf, sz);
- close(fd);
- }
+ if ((fd = open(filename, O_RDONLY)) == -1)
+ return 0;
+
+ if (element != NULL)
+ printf("<%s>\n", element);
+ fflush(stdout);
+ while ((sz = read(fd, buf, sizeof(buf))) > 0)
+ write(STDOUT_FILENO, buf, sz);
+ close(fd);
+ return 1;
}
-static void
+static int
resp_begin_html(int code, const char *msg, const char *file)
{
const char *name, *sec, *cp;
@@ -413,14 +418,14 @@ resp_begin_html(int code, const char *msg, const char *file)
"<body>\n",
CUSTOMIZE_TITLE);
- resp_copy(MAN_DIR "/header.html");
+ return resp_copy("header", MAN_DIR "/header.html");
}
static void
resp_end_html(void)
{
-
- resp_copy(MAN_DIR "/footer.html");
+ if (resp_copy("footer", MAN_DIR "/footer.html"))
+ puts("</footer>");
puts("</body>\n"
"</html>");
@@ -431,7 +436,7 @@ resp_searchform(const struct req *req, enum focus focus)
{
int i;
- printf("<form action=\"/%s\" method=\"get\" "
+ printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
"autocomplete=\"off\" autocapitalize=\"none\">\n"
" <fieldset>\n"
" <legend>Manual Page Search Parameters</legend>\n",
@@ -439,13 +444,14 @@ resp_searchform(const struct req *req, enum focus focus)
/* Write query input box. */
- printf(" <input type=\"search\" name=\"query\" value=\"");
+ printf(" <label>Search query:\n"
+ " <input type=\"search\" name=\"query\" value=\"");
if (req->q.query != NULL)
html_print(req->q.query);
- printf( "\" size=\"40\"");
+ printf("\" size=\"40\"");
if (focus == FOCUS_QUERY)
printf(" autofocus");
- puts(">");
+ puts(">\n </label>");
/* Write submission buttons. */
@@ -457,7 +463,7 @@ resp_searchform(const struct req *req, enum focus focus)
/* Write section selector. */
- puts(" <select name=\"sec\">");
+ puts(" <select name=\"sec\" aria-label=\"Manual section\">");
for (i = 0; i < sec_MAX; i++) {
printf(" <option value=\"%s\"", sec_numbers[i]);
if (NULL != req->q.sec &&
@@ -469,7 +475,7 @@ resp_searchform(const struct req *req, enum focus focus)
/* Write architecture selector. */
- printf( " <select name=\"arch\">\n"
+ printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
" <option value=\"default\"");
if (NULL == req->q.arch)
printf(" selected=\"selected\"");
@@ -486,7 +492,8 @@ resp_searchform(const struct req *req, enum focus focus)
/* Write manpath selector. */
if (req->psz > 1) {
- puts(" <select name=\"manpath\">");
+ puts(" <select name=\"manpath\""
+ " aria-label=\"Manual path\">");
for (i = 0; i < (int)req->psz; i++) {
printf(" <option");
if (strcmp(req->q.manpath, req->p[i]) == 0)
@@ -554,16 +561,21 @@ validate_filename(const char *file)
static void
pg_index(const struct req *req)
{
-
- resp_begin_html(200, NULL, NULL);
+ if (resp_begin_html(200, NULL, NULL) == 0)
+ puts("<header>");
resp_searchform(req, FOCUS_QUERY);
- printf("<p>\n"
+ printf("</header>\n"
+ "<main>\n"
+ "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
"This web interface is documented in the\n"
- "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
+ "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
+ " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
"manual, and the\n"
- "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
+ "<a class=\"Xr\" href=\"/%s%sapropos.1\""
+ " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
"manual explains the query syntax.\n"
- "</p>\n",
+ "</p>\n"
+ "</main>\n",
scriptname, *scriptname == '\0' ? "" : "/",
scriptname, *scriptname == '\0' ? "" : "/");
resp_end_html();
@@ -573,33 +585,40 @@ static void
pg_noresult(const struct req *req, int code, const char *http_msg,
const char *user_msg)
{
- resp_begin_html(code, http_msg, NULL);
+ if (resp_begin_html(code, http_msg, NULL) == 0)
+ puts("<header>");
resp_searchform(req, FOCUS_QUERY);
- puts("<p>");
+ puts("</header>");
+ puts("<main>");
+ puts("<p role=\"doc-notice\" aria-label=\"No result\">");
puts(user_msg);
puts("</p>");
+ puts("</main>");
resp_end_html();
}
static void
pg_error_badrequest(const char *msg)
{
-
- resp_begin_html(400, "Bad Request", NULL);
- puts("<h1>Bad Request</h1>\n"
- "<p>\n");
+ if (resp_begin_html(400, "Bad Request", NULL))
+ puts("</header>");
+ puts("<main>\n"
+ "<h1>Bad Request</h1>\n"
+ "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
puts(msg);
printf("Try again from the\n"
"<a href=\"/%s\">main page</a>.\n"
- "</p>", scriptname);
+ "</p>\n"
+ "</main>\n", scriptname);
resp_end_html();
}
static void
pg_error_internal(void)
{
- resp_begin_html(500, "Internal Server Error", NULL);
- puts("<p>Internal Server Error</p>");
+ if (resp_begin_html(500, "Internal Server Error", NULL))
+ puts("</header>");
+ puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
resp_end_html();
}
@@ -630,6 +649,7 @@ pg_searchres(const struct req *req, struct manpage *r, size_t sz)
size_t i, iuse;
int archprio, archpriouse;
int prio, priouse;
+ int have_header;
for (i = 0; i < sz; i++) {
if (validate_filename(r[i].file))
@@ -696,14 +716,18 @@ pg_searchres(const struct req *req, struct manpage *r, size_t sz)
priouse = prio;
iuse = i;
}
- resp_begin_html(200, NULL, r[iuse].file);
+ have_header = resp_begin_html(200, NULL, r[iuse].file);
} else
- resp_begin_html(200, NULL, NULL);
+ have_header = resp_begin_html(200, NULL, NULL);
+ if (have_header == 0)
+ puts("<header>");
resp_searchform(req,
req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
+ puts("</header>");
if (sz > 1) {
+ puts("<nav>");
puts("<table class=\"results\">");
for (i = 0; i < sz; i++) {
printf(" <tr>\n"
@@ -722,6 +746,7 @@ pg_searchres(const struct req *req, struct manpage *r, size_t sz)
" </tr>");
}
puts("</table>");
+ puts("</nav>");
}
if (req->q.equal || sz == 1) {
@@ -743,7 +768,9 @@ resp_catman(const struct req *req, const char *file)
int italic, bold;
if ((f = fopen(file, "r")) == NULL) {
- puts("<p>You specified an invalid manual file.</p>");
+ puts("<p role=\"doc-notice\">\n"
+ " You specified an invalid manual file.\n"
+ "</p>");
return;
}
@@ -879,8 +906,10 @@ resp_format(const struct req *req, const char *file)
int fd;
int usepath;
- if (-1 == (fd = open(file, O_RDONLY, 0))) {
- puts("<p>You specified an invalid manual file.</p>");
+ if (-1 == (fd = open(file, O_RDONLY))) {
+ puts("<p role=\"doc-notice\">\n"
+ " You specified an invalid manual file.\n"
+ "</p>");
return;
}
@@ -966,8 +995,10 @@ pg_show(struct req *req, const char *fullpath)
return;
}
- resp_begin_html(200, NULL, file);
+ if (resp_begin_html(200, NULL, file) == 0)
+ puts("<header>");
resp_searchform(req, FOCUS_NONE);
+ puts("</header>");
resp_show(req, file);
resp_end_html();
}
@@ -1066,7 +1097,7 @@ main(void)
#if HAVE_PLEDGE
/*
* The "rpath" pledge could be revoked after mparse_readfd()
- * if the file desciptor to "/footer.html" would be opened
+ * if the file descriptor to "/footer.html" would be opened
* up front, but it's probably not worth the complication
* of the code it would cause: it would require scattering
* pledge() calls in multiple low-level resp_*() functions.
diff --git a/chars.c b/chars.c
index d54fc458aea2..72fe8df67e83 100644
--- a/chars.c
+++ b/chars.c
@@ -1,4 +1,4 @@
-/* $Id: chars.c,v 1.79 2020/02/13 16:18:29 schwarze Exp $ */
+/* $Id: chars.c,v 1.81 2022/06/26 20:33:43 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2014, 2015, 2017, 2018, 2020
@@ -92,7 +92,6 @@ static struct ln lines[] = {
{ "en", "-", 0x2013 },
{ "hy", "-", 0x2010 },
{ "e", "\\", 0x005c },
- { ".", ".", 0x002e },
{ "r!", "!", 0x00a1 },
{ "r?", "?", 0x00bf },
@@ -365,7 +364,7 @@ static struct ln lines[] = {
{ "de", "<degree>", 0x00b0 },
{ "%0", "<permille>", 0x2030 },
{ "fm", "\'", 0x2032 },
- { "sd", "''", 0x2033 },
+ { "sd", "\"", 0x2033 },
{ "mc", "<micro>", 0x00b5 },
{ "Of", "_\ba", 0x00aa },
{ "Om", "_\bo", 0x00ba },
diff --git a/compat_strsep.c b/compat_strsep.c
index 9765ac823eeb..29865ba4a468 100644
--- a/compat_strsep.c
+++ b/compat_strsep.c
@@ -1,4 +1,4 @@
-/* $Id: compat_strsep.c,v 1.5 2020/06/15 01:37:15 schwarze Exp $ */
+/* $Id: compat_strsep.c,v 1.6 2022/06/21 10:34:14 schwarze Exp $ */
/* $OpenBSD: strsep.c,v 1.8 2015/08/31 02:53:57 guenther Exp $ */
/*-
@@ -31,6 +31,8 @@
*/
#include "config.h"
+#include <stddef.h>
+
/*
* Get next token from string *stringp, where tokens are possibly-empty
* strings separated by characters from delim.
diff --git a/configure b/configure
index 5cf4e081c2cb..7f5fa1976806 100755
--- a/configure
+++ b/configure
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# $Id: configure,v 1.81 2021/09/20 10:19:51 schwarze Exp $
+# $Id: configure,v 1.83 2023/10/19 11:45:42 schwarze Exp $
#
# Copyright (c) 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
#
@@ -37,6 +37,7 @@ SOURCEDIR=`dirname "${0}"`
MANPATH_BASE="/usr/share/man:/usr/X11R6/man"
MANPATH_DEFAULT="/usr/share/man:/usr/X11R6/man:/usr/local/man"
+DEBUG_MEMORY=0
OSENUM=
OSNAME=
UTF8_LOCALE=
@@ -99,6 +100,7 @@ NEED_GNU_SOURCE=0
NEED_OPENBSD_SOURCE=0
NEED_XPG4_2=0
+DEBUG_OBJS=
MANDOC_COBJS=
SOELIM_COBJS=
@@ -109,6 +111,7 @@ BIN_FROM_SBIN=
INCLUDEDIR=
LIBDIR=
MANDIR=
+MISCDIR=
READ_ALLOWED_PATH=
WWWPREFIX="/var/www"
@@ -334,6 +337,7 @@ runtest vasprintf VASPRINTF "" -D_GNU_SOURCE || true
# --- fts ---
if [ "${1}" = "-depend" ]; then
+ DEBUG_MEMORY=1
HAVE_FTS=0
HAVE_FTS_COMPARE_CONST=0
echo "tested fts: HAVE_FTS=0 (for make depend)" 1>&2
@@ -461,6 +465,10 @@ echo
echo "#define MAN_CONF_FILE \"/etc/${MANM_MANCONF}\""
echo "#define MANPATH_BASE \"${MANPATH_BASE}\""
echo "#define MANPATH_DEFAULT \"${MANPATH_DEFAULT}\""
+if [ ${DEBUG_MEMORY} -ne 0 ]; then
+ echo "#define DEBUG_MEMORY ${DEBUG_MEMORY}"
+ DEBUG_OBJS=mandoc_dbg.o
+fi
echo "#define OSENUM ${OSENUM}"
[ -n "${OSNAME}" ] && echo "#define OSNAME \"${OSNAME}\""
[ -n "${UTF8_LOCALE}" ] && echo "#define UTF8_LOCALE \"${UTF8_LOCALE}\""
@@ -613,6 +621,7 @@ exec > Makefile.local
[ -z "${INCLUDEDIR}" ] && INCLUDEDIR="${PREFIX}/include/mandoc"
[ -z "${LIBDIR}" ] && LIBDIR="${PREFIX}/lib/mandoc"
[ -z "${MANDIR}" ] && MANDIR="${PREFIX}/man"
+[ -z "${MISCDIR}" ] && MISCDIR="${PREFIX}/share/misc"
[ -z "${HTDOCDIR}" ] && HTDOCDIR="${WWWPREFIX}/htdocs"
[ -z "${CGIBINDIR}" ] && CGIBINDIR="${WWWPREFIX}/cgi-bin"
@@ -640,6 +649,7 @@ CC = ${CC}
CFLAGS = ${CFLAGS}
LDADD = ${LDADD}
LDFLAGS = ${LDFLAGS}
+DEBUG_OBJS = ${DEBUG_OBJS}
MANDOC_COBJS = ${MANDOC_COBJS}
SOELIM_COBJS = ${SOELIM_COBJS}
STATIC = ${STATIC}
@@ -650,6 +660,7 @@ BIN_FROM_SBIN = ${BIN_FROM_SBIN}
INCLUDEDIR = ${INCLUDEDIR}
LIBDIR = ${LIBDIR}
MANDIR = ${MANDIR}
+MISCDIR = ${MISCDIR}
WWWPREFIX = ${WWWPREFIX}
HTDOCDIR = ${HTDOCDIR}
CGIBINDIR = ${CGIBINDIR}
diff --git a/configure.local.example b/configure.local.example
index 1050f4a1d9ab..fa6271a0c393 100644
--- a/configure.local.example
+++ b/configure.local.example
@@ -1,6 +1,6 @@
-# $Id: configure.local.example,v 1.43 2021/09/20 13:25:42 schwarze Exp $
+# $Id: configure.local.example,v 1.45 2023/10/19 11:45:42 schwarze Exp $
#
-# Copyright (c) 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
+# Copyright (c) 2014-2022 Ingo Schwarze <schwarze@openbsd.org>
#
# Permission to use, copy, modify, and distribute this software for any
# purpose with or without fee is hereby granted, provided that the above
@@ -108,11 +108,15 @@ OSNAME="OpenBSD 7.0"
# there is no need to copy the whole block.
# Even if you set PREFIX to something else, the other variables
# pick it up without copying them all over.
+# MISCDIR is only used for installing the file mandoc.css.
+# That is important because users of "mandoc -T html" often need it
+# even if they are not using man.cgi(8), see mandoc(1) for details.
PREFIX="/usr/local"
BINDIR="${PREFIX}/bin"
SBINDIR="${PREFIX}/sbin"
MANDIR="${PREFIX}/man"
+MISCDIR="${PREFIX}/share/misc"
# If BINDIR and SBINDIR are not subdirectories of the same parent
# directory or if the basename(1) of BINDIR differs from "bin",
@@ -312,6 +316,12 @@ BINM_CATMAN=mcatman # default is "catman"
CFLAGS="-g"
+# Hunt for memory leaks.
+# Do not use for production builds.
+# See mandoc_dbg_init(3) for more information.
+
+DEBUG_MEMORY=1
+
# In rare cases, it may be required to skip individual automatic tests.
# Each of the following variables can be set to 0 (test will not be run
# and will be regarded as failed) or 1 (test will not be run and will
diff --git a/dba_read.c b/dba_read.c
index 4fc3ee5eddcb..4c7d78c11f88 100644
--- a/dba_read.c
+++ b/dba_read.c
@@ -1,4 +1,4 @@
-/* $Id: dba_read.c,v 1.5 2020/06/22 19:20:40 schwarze Exp $ */
+/* $Id: dba_read.c,v 1.6 2023/04/28 19:11:03 schwarze Exp $ */
/*
* Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
*
@@ -17,7 +17,7 @@
* Function to read the mandoc database from disk into RAM,
* such that data can be added or removed.
* The interface is defined in "dba.h".
- * This file is seperate from dba.c because this also uses "dbm.h".
+ * This file is separate from dba.c because this also uses "dbm.h".
*/
#include "config.h"
diff --git a/demandoc.1 b/demandoc.1
index bca69eff1b5b..02dba8ae5c32 100644
--- a/demandoc.1
+++ b/demandoc.1
@@ -1,4 +1,4 @@
-.\" $Id: demandoc.1,v 1.8 2014/09/12 00:10:26 schwarze Exp $
+.\" $Id: demandoc.1,v 1.9 2022/03/20 15:41:47 schwarze Exp $
.\"
.\" Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: September 12 2014 $
+.Dd $Mdocdate: March 20 2022 $
.Dt DEMANDOC 1
.Os
.Sh NAME
@@ -80,7 +80,6 @@ error accessing input files.
Such errors cause
.Nm
to exit at once, possibly in the middle of parsing or formatting a file.
-The output databases are corrupt and should be removed .
.El
.Sh EXAMPLES
The traditional usage of
diff --git a/demandoc.c b/demandoc.c
index 57d0cc5f4120..15f57d6d4d8a 100644
--- a/demandoc.c
+++ b/demandoc.c
@@ -1,4 +1,4 @@
-/* $Id: demandoc.c,v 1.33 2019/03/03 11:01:15 schwarze Exp $ */
+/* $Id: demandoc.c,v 1.34 2022/04/14 16:43:43 schwarze Exp $ */
/*
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -26,6 +26,10 @@
#include <unistd.h>
#include "mandoc.h"
+#if DEBUG_MEMORY
+#define DEBUG_NODEF
+#include "mandoc_dbg.h"
+#endif
#include "roff.h"
#include "man.h"
#include "mdoc.h"
@@ -47,6 +51,10 @@ main(int argc, char *argv[])
int ch, fd, i, list;
extern int optind;
+#if DEBUG_MEMORY
+ mandoc_dbg_init(argc, argv);
+#endif
+
if (argc < 1)
progname = "demandoc";
else if ((progname = strrchr(argv[0], '/')) == NULL)
@@ -97,6 +105,9 @@ main(int argc, char *argv[])
mparse_free(mp);
mchars_free();
+#if DEBUG_MEMORY
+ mandoc_dbg_finish();
+#endif
return (int)MANDOCLEVEL_OK;
}
diff --git a/eqn.c b/eqn.c
index 27f5cac396cb..addbae4776be 100644
--- a/eqn.c
+++ b/eqn.c
@@ -1,7 +1,8 @@
-/* $Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */
+/* $Id: eqn.c,v 1.86 2023/04/28 19:11:03 schwarze Exp $ */
/*
+ * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022
+ * Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -355,7 +356,7 @@ eqn_def_find(struct eqn_node *ep)
/*
* Parse a token from the input text. The modes are:
* MODE_QUOTED: Use *ep->start as the delimiter; the token ends
- * before its next occurence. Do not interpret the token in any
+ * before its next occurrence. Do not interpret the token in any
* way and return EQN_TOK_QUOTED. All other modes behave like
* MODE_QUOTED when *ep->start is '"'.
* MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
@@ -375,19 +376,17 @@ eqn_def_find(struct eqn_node *ep)
static enum eqn_tok
eqn_next(struct eqn_node *ep, enum parse_mode mode)
{
- static int last_len, lim;
-
struct eqn_def *def;
size_t start;
- int diff, i, quoted;
+ int diff, i, newlen, quoted;
enum eqn_tok tok;
/*
* Reset the recursion counter after advancing
- * beyond the end of the previous substitution.
+ * beyond the end of the rightmost substitution.
*/
- if (ep->end - ep->data >= last_len)
- lim = 0;
+ if (ep->end - ep->data >= ep->sublen)
+ ep->subcnt = 0;
ep->start = ep->end;
quoted = mode == MODE_QUOTED;
@@ -434,10 +433,10 @@ eqn_next(struct eqn_node *ep, enum parse_mode mode)
return EQN_TOK__MAX;
if ((def = eqn_def_find(ep)) == NULL)
break;
- if (++lim > EQN_NEST_MAX) {
+ if (++ep->subcnt > EQN_NEST_MAX) {
mandoc_msg(MANDOCERR_ROFFLOOP,
ep->node->line, ep->node->pos, NULL);
- return EQN_TOK_EOF;
+ break;
}
/* Replace a defined name with its string value. */
@@ -446,12 +445,15 @@ eqn_next(struct eqn_node *ep, enum parse_mode mode)
ep->sz += diff;
ep->data = mandoc_realloc(ep->data, ep->sz + 1);
ep->start = ep->data + start;
+ ep->sublen += diff;
}
if (diff)
memmove(ep->start + def->valsz, ep->start + ep->toksz,
strlen(ep->start + ep->toksz) + 1);
memcpy(ep->start, def->val, def->valsz);
- last_len = ep->start - ep->data + def->valsz;
+ newlen = ep->start - ep->data + def->valsz;
+ if (ep->sublen < newlen)
+ ep->sublen = newlen;
}
if (mode != MODE_TOK)
return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
@@ -678,6 +680,8 @@ eqn_parse(struct eqn_node *ep)
return;
ep->start = ep->end = ep->data;
+ ep->sublen = 0;
+ ep->subcnt = 0;
next_tok:
tok = eqn_next(ep, MODE_TOK);
diff --git a/eqn_parse.h b/eqn_parse.h
index a2a4e6fd7d37..63d7c395f8e8 100644
--- a/eqn_parse.h
+++ b/eqn_parse.h
@@ -1,7 +1,7 @@
-/* $Id: eqn_parse.h,v 1.3 2018/12/14 06:33:14 schwarze Exp $ */
+/* $Id: eqn_parse.h,v 1.4 2022/04/13 20:26:19 schwarze Exp $ */
/*
+ * Copyright (c) 2014, 2017, 2018, 2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -32,6 +32,8 @@ struct eqn_node {
size_t defsz; /* Number of definitions. */
size_t sz; /* Length of the source code. */
size_t toksz; /* Length of the current token. */
+ int sublen; /* End of rightmost substitution, so far. */
+ int subcnt; /* Number of recursive substitutions. */
int gsize; /* Default point size. */
int delim; /* In-line delimiters enabled. */
char odelim; /* In-line opening delimiter. */
diff --git a/html.c b/html.c
index 71c9c711e825..53e426ffb102 100644
--- a/html.c
+++ b/html.c
@@ -1,7 +1,8 @@
-/* $Id: html.c,v 1.275 2021/09/09 14:47:24 schwarze Exp $ */
+/* $Id: html.c,v 1.279 2022/08/09 11:23:11 schwarze Exp $ */
/*
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -67,8 +68,10 @@ static const struct htmldata htmltags[TAG_MAX] = {
{"style", HTML_NLALL | HTML_INDENT},
{"title", HTML_NLAROUND},
{"body", HTML_NLALL},
+ {"main", HTML_NLALL},
{"div", HTML_NLAROUND},
{"section", HTML_NLALL},
+ {"nav", HTML_NLALL},
{"table", HTML_NLALL | HTML_INDENT},
{"tr", HTML_NLALL | HTML_INDENT},
{"td", HTML_NLAROUND},
@@ -78,8 +81,8 @@ static const struct htmldata htmltags[TAG_MAX] = {
{"dl", HTML_NLALL | HTML_INDENT},
{"dt", HTML_NLAROUND},
{"dd", HTML_NLAROUND | HTML_INDENT},
- {"h1", HTML_TOPHRASE | HTML_NLAROUND},
{"h2", HTML_TOPHRASE | HTML_NLAROUND},
+ {"h3", HTML_TOPHRASE | HTML_NLAROUND},
{"p", HTML_TOPHRASE | HTML_NLAROUND | HTML_INDENT},
{"pre", HTML_TOPHRASE | HTML_NLAROUND | HTML_NOINDENT},
{"a", HTML_INPHRASE | HTML_TOPHRASE},
@@ -400,10 +403,13 @@ html_make_id(const struct roff_node *n, int unique)
* In addition, reserve '~' for ordinal suffixes.
*/
- for (cp = buf; *cp != '\0'; cp++)
- if (isalnum((unsigned char)*cp) == 0 &&
+ for (cp = buf; *cp != '\0'; cp++) {
+ if (*cp == ASCII_HYPH)
+ *cp = '-';
+ else if (isalnum((unsigned char)*cp) == 0 &&
strchr("!$&'()*+,-./:;=?@_", *cp) == NULL)
*cp = '_';
+ }
if (unique == 0)
return buf;
@@ -708,6 +714,9 @@ print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
case 'i':
attr = "id";
break;
+ case 'r':
+ attr = "role";
+ break;
case '?':
attr = arg1;
arg1 = va_arg(ap, char *);
diff --git a/html.h b/html.h
index 3d201403342d..49ae0f72960c 100644
--- a/html.h
+++ b/html.h
@@ -1,4 +1,4 @@
-/* $Id: html.h,v 1.109 2021/09/09 14:47:24 schwarze Exp $ */
+/* $Id: html.h,v 1.112 2022/07/06 14:34:59 schwarze Exp $ */
/*
* Copyright (c) 2017, 2018, 2019, 2020 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -27,8 +27,10 @@ enum htmltag {
TAG_STYLE,
TAG_TITLE,
TAG_BODY,
+ TAG_MAIN,
TAG_DIV,
TAG_SECTION,
+ TAG_NAV,
TAG_TABLE,
TAG_TR,
TAG_TD,
@@ -38,8 +40,8 @@ enum htmltag {
TAG_DL,
TAG_DT,
TAG_DD,
- TAG_H1,
TAG_H2,
+ TAG_H3,
TAG_P,
TAG_PRE,
TAG_A,
diff --git a/main.c b/main.c
index c5a7cff918ab..aa997041b234 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,4 @@
-/* $Id: main.c,v 1.358 2021/09/04 22:38:46 schwarze Exp $ */
+/* $Id: main.c,v 1.361 2022/04/14 16:43:43 schwarze Exp $ */
/*
* Copyright (c) 2010-2012, 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -149,11 +149,14 @@ main(int argc, char *argv[])
enum mandoc_os os_e; /* Check base system conventions. */
enum outmode outmode; /* According to command line. */
+#if DEBUG_MEMORY
+ mandoc_dbg_init(argc, argv);
+#endif
#if HAVE_PROGNAME
progname = getprogname();
#else
if (argc < 1)
- progname = mandoc_strdup("mandoc");
+ progname = "mandoc";
else if ((progname = strrchr(argv[0], '/')) == NULL)
progname = argv[0];
else
@@ -516,6 +519,9 @@ main(int argc, char *argv[])
memcpy(res + ressz, resn,
sizeof(*resn) * resnsz);
ressz += resnsz;
+ free(resn);
+ resn = NULL;
+ resnsz = 0;
continue;
}
@@ -554,6 +560,10 @@ main(int argc, char *argv[])
res = mandoc_reallocarray(res, ressz + 1,
sizeof(*res));
memcpy(res + ressz++, resn + ib, sizeof(*resn));
+ memset(resn + ib, 0, sizeof(*resn));
+ mansearch_free(resn, resnsz);
+ resn = NULL;
+ resnsz = 0;
}
/* apropos(1), whatis(1): Process the full search expression. */
@@ -664,6 +674,9 @@ out:
} else if (outst.had_output && outst.outtype != OUTT_LINT)
mandoc_msg_summary();
+#if DEBUG_MEMORY
+ mandoc_dbg_finish();
+#endif
return (int)mandoc_msg_getrc();
}
@@ -1279,6 +1292,7 @@ spawn_pager(struct outstate *outst, char *tag_target)
char *argv[MAX_PAGER_ARGS];
const char *pager;
char *cp;
+ size_t wordlen;
#if HAVE_LESS_T
size_t cmdlen;
#endif
@@ -1293,7 +1307,6 @@ spawn_pager(struct outstate *outst, char *tag_target)
pager = getenv("PAGER");
if (pager == NULL || *pager == '\0')
pager = BINM_PAGER;
- cp = mandoc_strdup(pager);
/*
* Parse the pager command into words.
@@ -1301,16 +1314,12 @@ spawn_pager(struct outstate *outst, char *tag_target)
*/
argc = 0;
- while (argc + 5 < MAX_PAGER_ARGS) {
- argv[argc++] = cp;
- cp = strchr(cp, ' ');
- if (cp == NULL)
- break;
- *cp++ = '\0';
- while (*cp == ' ')
- cp++;
- if (*cp == '\0')
- break;
+ while (*pager != '\0' && argc + 5 < MAX_PAGER_ARGS) {
+ wordlen = strcspn(pager, " ");
+ argv[argc++] = mandoc_strndup(pager, wordlen);
+ pager += wordlen;
+ while (*pager == ' ')
+ pager++;
}
/* For less(1), use the tag file. */
@@ -1322,10 +1331,10 @@ spawn_pager(struct outstate *outst, char *tag_target)
cp = argv[0] + cmdlen - 4;
if (strcmp(cp, "less") == 0) {
argv[argc++] = mandoc_strdup("-T");
- argv[argc++] = outst->tag_files->tfn;
+ argv[argc++] = mandoc_strdup(outst->tag_files->tfn);
if (tag_target != NULL) {
argv[argc++] = mandoc_strdup("-t");
- argv[argc++] = tag_target;
+ argv[argc++] = mandoc_strdup(tag_target);
use_ofn = 0;
}
}
@@ -1336,7 +1345,7 @@ spawn_pager(struct outstate *outst, char *tag_target)
mandoc_asprintf(&argv[argc], "file://%s#%s",
outst->tag_files->ofn, tag_target);
else
- argv[argc] = outst->tag_files->ofn;
+ argv[argc] = mandoc_strdup(outst->tag_files->ofn);
argc++;
}
argv[argc] = NULL;
@@ -1348,6 +1357,8 @@ spawn_pager(struct outstate *outst, char *tag_target)
case 0:
break;
default:
+ while (argc > 0)
+ free(argv[--argc]);
(void)setpgid(pager_pid, 0);
(void)tcsetpgrp(STDOUT_FILENO, pager_pid);
#if HAVE_PLEDGE
diff --git a/makewhatis.8 b/makewhatis.8
index 9f307a351d3d..3c79d727bbee 100644
--- a/makewhatis.8
+++ b/makewhatis.8
@@ -1,4 +1,4 @@
-.\" $Id: makewhatis.8,v 1.6 2017/05/17 22:27:12 schwarze Exp $
+.\" $Id: makewhatis.8,v 1.7 2022/03/16 23:26:14 schwarze Exp $
.\"
.\" Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2011, 2012, 2014, 2017 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 17 2017 $
+.Dd $Mdocdate: March 16 2022 $
.Dt MAKEWHATIS 8
.Os
.Sh NAME
@@ -52,9 +52,7 @@ manuals and indexes them in a database for fast retrieval by
.Xr apropos 1 ,
.Xr whatis 1 ,
and
-.Xr man 1 Ns 's
-.Fl k
-option.
+.Xr man 1 .
.Pp
By default,
.Nm
diff --git a/man.1 b/man.1
index d3a54c6a235c..888cabb4502b 100644
--- a/man.1
+++ b/man.1
@@ -1,4 +1,4 @@
-.\" $Id: man.1,v 1.40 2020/07/20 16:57:30 schwarze Exp $
+.\" $Id: man.1,v 1.41 2022/08/04 11:32:23 schwarze Exp $
.\"
.\" Copyright (c) 1989, 1990, 1993
.\" The Regents of the University of California. All rights reserved.
@@ -31,7 +31,7 @@
.\"
.\" @(#)man.1 8.2 (Berkeley) 1/2/94
.\"
-.Dd $Mdocdate: July 20 2020 $
+.Dd $Mdocdate: August 4 2022 $
.Dt MAN 1
.Os
.Sh NAME
@@ -391,7 +391,9 @@ option first appeared in
and
.Fl k
in
-.Bx 4 ;
+.Pa /usr/usb/man
+in
+.Bx 4.0 ;
.Fl M
in
.Bx 4.3 ;
diff --git a/man.7 b/man.7
index cca9c1fe3520..4d27c76ba110 100644
--- a/man.7
+++ b/man.7
@@ -1,7 +1,7 @@
-.\" $Id: man.7,v 1.148 2021/08/05 14:31:14 schwarze Exp $
+.\" $Id: man.7,v 1.150 2023/10/23 22:57:54 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2011-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2011-2015,2017-2020,2023 Ingo Schwarze <schwarze@openbsd.org>
.\" Copyright (c) 2017 Anthony Bentley <bentley@openbsd.org>
.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
.\"
@@ -17,7 +17,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: August 5 2021 $
+.Dd $Mdocdate: October 23 2023 $
.Dt MAN 7
.Os
.Sh NAME
@@ -97,10 +97,11 @@ but can be found in the alphabetical reference below.
.It Ic SH Ta section header (one line)
.It Ic SS Ta subsection header (one line)
.It Ic PP Ta start an undecorated paragraph (no arguments)
-.It Ic RS , RE Ta reset the left margin: Op Ar width
.It Ic IP Ta indented paragraph: Op Ar head Op Ar width
.It Ic TP Ta tagged paragraph: Op Ar width
.It Ic PD Ta set vertical paragraph distance: Op Ar height
+.It Ic EX , EE Ta display an example (no arguments)
+.It Ic RS , RE Ta reset the left margin: Op Ar width
.It Ic in Ta additional indent: Op Ar width
.El
.Ss Physical markup
@@ -162,7 +163,9 @@ This has no effect unless the tabulator positions were changed with the
.Ic ta
request.
.It Ic EE
-This is a non-standard Version 9
+End an example block started with
+.Ic EX .
+This is a Version 9
.At
extension later adopted by GNU.
In
@@ -172,7 +175,8 @@ it does the same as the
.Ic fi
request (switch to fill mode).
.It Ic EX
-This is a non-standard Version 9
+Begin a block to display an example.
+This is a Version 9
.At
extension later adopted by GNU.
In
@@ -234,10 +238,10 @@ A synonym for
.It Ic ME
End a mailto block started with
.Ic MT .
-This is a non-standard GNU extension.
+This is a GNU extension.
.It Ic MT
Begin a mailto block.
-This is a non-standard GNU extension.
+This is a GNU extension.
It has the following syntax:
.Bd -unfilled -offset indent
.Pf . Ic MT Ar address
@@ -246,7 +250,7 @@ link description to be shown
.Ed
.It Ic OP
Optional command-line argument.
-This is a non-standard DWB extension.
+This is a rarely used DWB extension.
It has the following syntax:
.Pp
.D1 Pf . Ic OP Ar key Op Ar value
@@ -373,8 +377,7 @@ Begin a synopsis block with the following syntax:
.Pf . Ic YS
.Ed
.Pp
-This is a non-standard GNU extension
-and very rarely used even in GNU manual pages.
+This is a GNU extension and rarely used even in GNU manual pages.
Formatting is similar to
.Ic IP .
.It Ic TH
@@ -437,8 +440,7 @@ unspecified, the saved or default width is used.
Like
.Ic TP ,
except that no vertical spacing is inserted before the paragraph.
-This is a non-standard GNU extension
-and very rarely used even in GNU manual pages.
+This is a GNU extension.
.It Ic UC
Sets the volume for the footer for compatibility with man pages from
.Bx
@@ -449,10 +451,10 @@ This macro is an extension that first appeared in
.It Ic UE
End a uniform resource identifier block started with
.Ic UR .
-This is a non-standard GNU extension.
+This is a GNU extension.
.It Ic UR
Begin a uniform resource identifier block.
-This is a non-standard GNU extension.
+This is a GNU extension.
It has the following syntax:
.Bd -unfilled -offset indent
.Pf . Ic UR Ar uri
@@ -462,7 +464,7 @@ link description to be shown
.It Ic YS
End a synopsis block started with
.Ic SY .
-This is a non-standard GNU extension.
+This is a GNU extension.
.It Ic in
Indent relative to the current indentation:
.Pp
diff --git a/man.c b/man.c
index f0e4002b2a2c..f651efe3de8b 100644
--- a/man.c
+++ b/man.c
@@ -1,7 +1,7 @@
-/* $Id: man.c,v 1.187 2019/01/05 00:36:50 schwarze Exp $ */
+/* $Id: man.c,v 1.189 2022/08/16 23:01:09 schwarze Exp $ */
/*
+ * Copyright (c) 2013-2015,2017-2019,2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2013-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -70,19 +70,13 @@ man_hasc(char *start)
return (ep - cp) % 2 ? NULL : ep;
}
+/*
+ * Rewind all open next-line scopes.
+ */
void
man_descope(struct roff_man *man, int line, int offs, char *start)
{
- /* Trailing \c keeps next-line scope open. */
-
- if (start != NULL && man_hasc(start) != NULL)
- return;
-
- /*
- * Co-ordinate what happens with having a next-line scope open:
- * first close out the element scopes (if applicable),
- * then close out the block scope (also if applicable).
- */
+ /* First close out all next-line element scopes, if any. */
if (man->flags & MAN_ELINE) {
while (man->last->parent->type != ROFFT_ROOT &&
@@ -90,6 +84,14 @@ man_descope(struct roff_man *man, int line, int offs, char *start)
man_unscope(man, man->last->parent);
man->flags &= ~MAN_ELINE;
}
+
+ /* Trailing \c keeps next-line block scope open. */
+
+ if (start != NULL && man_hasc(start) != NULL)
+ return;
+
+ /* Close out the next-line block scope, if there is one. */
+
if ( ! (man->flags & MAN_BLINE))
return;
man_unscope(man, man->last->parent);
@@ -274,6 +276,10 @@ man_pmacro(struct roff_man *man, int ln, char *buf, int offs)
return 1;
}
+/*
+ * Rewind open next-line scopes
+ * unless the tok request or macro is allowed inside them.
+ */
void
man_breakscope(struct roff_man *man, int tok)
{
@@ -294,10 +300,15 @@ man_breakscope(struct roff_man *man, int tok)
(man_macro(n->tok)->flags & (MAN_NSCOPED | MAN_ESCOPED))
== MAN_NSCOPED)
n = n->parent;
-
- mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
- "%s breaks %s", roff_name[tok], roff_name[n->tok]);
-
+ for (;;) {
+ mandoc_msg(MANDOCERR_BLK_LINE, n->line, n->pos,
+ "%s breaks %s", roff_name[tok], roff_name[n->tok]);
+ if (n->parent->type != ROFFT_ELEM ||
+ (man_macro(n->parent->tok)->flags &
+ MAN_ESCOPED) == 0)
+ break;
+ n = n->parent;
+ }
roff_node_delete(man, n);
man->flags &= ~MAN_ELINE;
}
diff --git a/man.cgi.8 b/man.cgi.8
index b8cd623c598d..a57fd156eb02 100644
--- a/man.cgi.8
+++ b/man.cgi.8
@@ -1,4 +1,4 @@
-.\" $Id: man.cgi.8,v 1.23 2018/05/20 21:48:44 schwarze Exp $
+.\" $Id: man.cgi.8,v 1.24 2022/07/06 15:47:28 schwarze Exp $
.\"
.\" Copyright (c) 2014, 2015, 2016 Ingo Schwarze <schwarze@openbsd.org>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: May 20 2018 $
+.Dd $Mdocdate: July 6 2022 $
.Dt MAN.CGI 8
.Os
.Sh NAME
@@ -381,11 +381,13 @@ or any character not contained in the
.Nm
reports an internal server error and exits without doing anything.
.It Pa /man/header.html
-An optional file containing static HTML code to be inserted right
-after opening the <BODY> element.
+An optional file containing static HTML code to be wrapped in
+a <HEADER> element and inserted right after opening the <BODY> element.
+For example, it can contain an <H1> element
+specifying the name of the website.
.It Pa /man/footer.html
-An optional file containing static HTML code to be inserted right
-before closing the <BODY> element.
+An optional file containing static HTML code to be wrapped in
+a <FOOTER> element and inserted right before closing the <BODY> element.
.It Pa /man/OpenBSD-current/man1/mandoc.1
An example
.Xr mdoc 7
diff --git a/man_html.c b/man_html.c
index 147c20e46443..6784171af1e6 100644
--- a/man_html.c
+++ b/man_html.c
@@ -1,6 +1,6 @@
-/* $Id: man_html.c,v 1.179 2020/10/16 17:22:43 schwarze Exp $ */
+/* $Id: man_html.c,v 1.187 2023/10/24 20:53:12 schwarze Exp $ */
/*
- * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013-15,2017-20,2022-23 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -53,6 +53,7 @@ static char list_continues(const struct roff_node *,
static int man_B_pre(MAN_ARGS);
static int man_IP_pre(MAN_ARGS);
static int man_I_pre(MAN_ARGS);
+static int man_MR_pre(MAN_ARGS);
static int man_OP_pre(MAN_ARGS);
static int man_PP_pre(MAN_ARGS);
static int man_RS_pre(MAN_ARGS);
@@ -60,7 +61,6 @@ static int man_SH_pre(MAN_ARGS);
static int man_SM_pre(MAN_ARGS);
static int man_SY_pre(MAN_ARGS);
static int man_UR_pre(MAN_ARGS);
-static int man_abort_pre(MAN_ARGS);
static int man_alt_pre(MAN_ARGS);
static int man_ign_pre(MAN_ARGS);
static int man_in_pre(MAN_ARGS);
@@ -75,9 +75,9 @@ static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
{ man_SH_pre, NULL }, /* SS */
{ man_IP_pre, NULL }, /* TP */
{ man_IP_pre, NULL }, /* TQ */
- { man_abort_pre, NULL }, /* LP */
+ { man_PP_pre, NULL }, /* LP */
{ man_PP_pre, NULL }, /* PP */
- { man_abort_pre, NULL }, /* P */
+ { man_PP_pre, NULL }, /* P */
{ man_IP_pre, NULL }, /* IP */
{ man_PP_pre, NULL }, /* HP */
{ man_SM_pre, NULL }, /* SM */
@@ -107,6 +107,7 @@ static const struct man_html_act man_html_acts[MAN_MAX - MAN_TH] = {
{ NULL, NULL }, /* UE */
{ man_UR_pre, NULL }, /* MT */
{ NULL, NULL }, /* ME */
+ { man_MR_pre, NULL }, /* MR */
};
@@ -123,16 +124,16 @@ html_man(void *arg, const struct roff_meta *man)
if ((h->oflags & HTML_FRAGMENT) == 0) {
print_gen_decls(h);
print_otag(h, TAG_HTML, "");
- if (n != NULL && n->type == ROFFT_COMMENT)
- print_gen_comment(h, n);
t = print_otag(h, TAG_HEAD, "");
print_man_head(man, h);
print_tagq(h, t);
+ if (n != NULL && n->type == ROFFT_COMMENT)
+ print_gen_comment(h, n);
print_otag(h, TAG_BODY, "");
}
man_root_pre(man, h);
- t = print_otag(h, TAG_DIV, "c", "manual-text");
+ t = print_otag(h, TAG_MAIN, "c", "manual-text");
print_man_nodelist(man, n, h);
print_tagq(h, t);
man_root_post(man, h);
@@ -263,26 +264,26 @@ print_man_node(MAN_ARGS)
static void
man_root_pre(const struct roff_meta *man, struct html *h)
{
- struct tag *t, *tt;
+ struct tag *t;
char *title;
assert(man->title);
assert(man->msec);
mandoc_asprintf(&title, "%s(%s)", man->title, man->msec);
- t = print_otag(h, TAG_TABLE, "c", "head");
- tt = print_otag(h, TAG_TR, "");
+ t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader",
+ "aria-label", "Manual header line");
- print_otag(h, TAG_TD, "c", "head-ltitle");
+ print_otag(h, TAG_SPAN, "c", "head-ltitle");
print_text(h, title);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "head-vol");
+ print_otag(h, TAG_SPAN, "c", "head-vol");
if (man->vol != NULL)
print_text(h, man->vol);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "head-rtitle");
+ print_otag(h, TAG_SPAN, "c", "head-rtitle");
print_text(h, title);
print_tagq(h, t);
free(title);
@@ -291,16 +292,19 @@ man_root_pre(const struct roff_meta *man, struct html *h)
static void
man_root_post(const struct roff_meta *man, struct html *h)
{
- struct tag *t, *tt;
+ struct tag *t;
+
+ t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter",
+ "aria-label", "Manual footer line");
- t = print_otag(h, TAG_TABLE, "c", "foot");
- tt = print_otag(h, TAG_TR, "");
+ print_otag(h, TAG_SPAN, "c", "foot-left");
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "foot-date");
+ print_otag(h, TAG_SPAN, "c", "foot-date");
print_text(h, man->date);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "foot-os");
+ print_otag(h, TAG_SPAN, "c", "foot-os");
if (man->os != NULL)
print_text(h, man->os);
print_tagq(h, t);
@@ -313,10 +317,10 @@ man_SH_pre(MAN_ARGS)
enum htmltag tag;
if (n->tok == MAN_SH) {
- tag = TAG_H1;
+ tag = TAG_H2;
class = "Sh";
} else {
- tag = TAG_H2;
+ tag = TAG_H3;
class = "Ss";
}
switch (n->type) {
@@ -403,7 +407,7 @@ man_PP_pre(MAN_ARGS)
if (n->child != NULL &&
(n->child->flags & NODE_NOFILL) == 0)
print_otag(h, TAG_P, "c",
- n->tok == MAN_PP ? "Pp" : "Pp HP");
+ n->tok == MAN_HP ? "Pp HP" : "Pp");
break;
default:
abort();
@@ -431,10 +435,12 @@ list_continues(const struct roff_node *n1, const struct roff_node *n2)
s2 = n2 == NULL ? "" : n2->string;
c1 = strcmp(s1, "*") == 0 ? '*' :
strcmp(s1, "\\-") == 0 ? '-' :
- strcmp(s1, "\\(bu") == 0 ? 'b' : ' ';
+ strcmp(s1, "\\(bu") == 0 ? 'b' :
+ strcmp(s1, "\\[bu]") == 0 ? 'b' : ' ';
c2 = strcmp(s2, "*") == 0 ? '*' :
strcmp(s2, "\\-") == 0 ? '-' :
- strcmp(s2, "\\(bu") == 0 ? 'b' : ' ';
+ strcmp(s2, "\\(bu") == 0 ? 'b' :
+ strcmp(s2, "\\[bu]") == 0 ? 'b' : ' ';
return c1 != c2 ? '\0' : c1 == 'b' ? '*' : c1;
}
@@ -515,6 +521,52 @@ man_IP_pre(MAN_ARGS)
}
static int
+man_MR_pre(MAN_ARGS)
+{
+ struct tag *t;
+ const char *name, *section, *suffix;
+ char *label;
+
+ html_setfont(h, ESCAPE_FONTROMAN);
+ name = section = suffix = label = NULL;
+ if (n->child != NULL) {
+ name = n->child->string;
+ if (n->child->next != NULL) {
+ section = n->child->next->string;
+ mandoc_asprintf(&label,
+ "%s, section %s", name, section);
+ if (n->child->next->next != NULL)
+ suffix = n->child->next->next->string;
+ }
+ }
+
+ if (name != NULL && section != NULL && h->base_man1 != NULL)
+ t = print_otag(h, TAG_A, "chM?", "Xr",
+ name, section, "aria-label", label);
+ else
+ t = print_otag(h, TAG_A, "c?", "Xr", "aria-label", label);
+
+ free(label);
+ if (name != NULL) {
+ print_text(h, name);
+ h->flags |= HTML_NOSPACE;
+ }
+ print_text(h, "(");
+ h->flags |= HTML_NOSPACE;
+ if (section != NULL) {
+ print_text(h, section);
+ h->flags |= HTML_NOSPACE;
+ }
+ print_text(h, ")");
+ print_tagq(h, t);
+ if (suffix != NULL) {
+ h->flags |= HTML_NOSPACE;
+ print_text(h, suffix);
+ }
+ return 0;
+}
+
+static int
man_OP_pre(MAN_ARGS)
{
struct tag *tt;
@@ -632,9 +684,3 @@ man_UR_pre(MAN_ARGS)
print_man_nodelist(man, n->child, h);
return 0;
}
-
-static int
-man_abort_pre(MAN_ARGS)
-{
- abort();
-}
diff --git a/man_macro.c b/man_macro.c
index b3c3a3cb488b..40120cacbe39 100644
--- a/man_macro.c
+++ b/man_macro.c
@@ -1,7 +1,7 @@
-/* $Id: man_macro.c,v 1.145 2020/09/09 17:01:10 schwarze Exp $ */
+/* $Id: man_macro.c,v 1.150 2023/11/13 19:13:01 schwarze Exp $ */
/*
+ * Copyright (c) 2012-2015,2017-2020,2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2012-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -26,6 +26,9 @@
#include <stdlib.h>
#include <string.h>
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
#include "mandoc.h"
#include "roff.h"
#include "man.h"
@@ -65,10 +68,10 @@ static const struct man_macro man_macros[MAN_MAX - MAN_TH] = {
{ in_line_eoln, 0 }, /* RI */
{ blk_close, MAN_XSCOPE }, /* RE */
{ blk_exp, MAN_XSCOPE }, /* RS */
- { in_line_eoln, 0 }, /* DT */
- { in_line_eoln, 0 }, /* UC */
+ { in_line_eoln, MAN_NSCOPED }, /* DT */
+ { in_line_eoln, MAN_NSCOPED }, /* UC */
{ in_line_eoln, MAN_NSCOPED }, /* PD */
- { in_line_eoln, 0 }, /* AT */
+ { in_line_eoln, MAN_NSCOPED }, /* AT */
{ in_line_eoln, MAN_NSCOPED }, /* in */
{ blk_imp, MAN_XSCOPE }, /* SY */
{ blk_close, MAN_XSCOPE }, /* YS */
@@ -79,6 +82,7 @@ static const struct man_macro man_macros[MAN_MAX - MAN_TH] = {
{ blk_close, MAN_XSCOPE }, /* UE */
{ blk_exp, MAN_XSCOPE }, /* MT */
{ blk_close, MAN_XSCOPE }, /* ME */
+ { in_line_eoln, 0 }, /* MR */
};
@@ -108,7 +112,8 @@ man_unscope(struct roff_man *man, const struct roff_node *to)
n->line, n->pos,
"EOF breaks %s", roff_name[n->tok]);
if (man->flags & MAN_ELINE) {
- if ((man_macro(n->parent->tok)->flags &
+ if (n->parent->type == ROFFT_ROOT ||
+ (man_macro(n->parent->tok)->flags &
MAN_ESCOPED) == 0)
man->flags &= ~MAN_ELINE;
} else {
@@ -312,7 +317,7 @@ blk_exp(MACRO_PROT_ARGS)
if (tok == MAN_RS) {
if (roff_getreg(man->roff, "an-margin") == 0)
roff_setreg(man->roff, "an-margin",
- 7 * 24, '=');
+ 5 * 24, '=');
if ((head->aux = strtod(p, NULL) * 24.0) > 0)
roff_setreg(man->roff, "an-margin",
head->aux, '+');
@@ -393,6 +398,11 @@ in_line_eoln(MACRO_PROT_ARGS)
else if (tok == MAN_EE)
man->flags &= ~ROFF_NOFILL;
+#if DEBUG_MEMORY
+ if (tok == MAN_TH)
+ mandoc_dbg_name(buf);
+#endif
+
for (;;) {
if (buf[*pos] != '\0' && man->last != n && tok == MAN_PD) {
mandoc_msg(MANDOCERR_ARG_EXCESS, line, *pos,
diff --git a/man_term.c b/man_term.c
index d289f2d12bfb..706fab8cd4d1 100644
--- a/man_term.c
+++ b/man_term.c
@@ -1,6 +1,6 @@
-/* $Id: man_term.c,v 1.236 2021/06/28 19:50:15 schwarze Exp $ */
+/* $Id: man_term.c,v 1.244 2023/11/13 19:13:01 schwarze Exp $ */
/*
- * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-15,2017-20,2022-23 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -74,6 +74,7 @@ static int pre_DT(DECL_ARGS);
static int pre_HP(DECL_ARGS);
static int pre_I(DECL_ARGS);
static int pre_IP(DECL_ARGS);
+static int pre_MR(DECL_ARGS);
static int pre_OP(DECL_ARGS);
static int pre_PD(DECL_ARGS);
static int pre_PP(DECL_ARGS);
@@ -83,7 +84,6 @@ static int pre_SS(DECL_ARGS);
static int pre_SY(DECL_ARGS);
static int pre_TP(DECL_ARGS);
static int pre_UR(DECL_ARGS);
-static int pre_abort(DECL_ARGS);
static int pre_alternate(DECL_ARGS);
static int pre_ign(DECL_ARGS);
static int pre_in(DECL_ARGS);
@@ -103,9 +103,9 @@ static const struct man_term_act man_term_acts[MAN_MAX - MAN_TH] = {
{ pre_SS, post_SH, 0 }, /* SS */
{ pre_TP, post_TP, 0 }, /* TP */
{ pre_TP, post_TP, 0 }, /* TQ */
- { pre_abort, NULL, 0 }, /* LP */
+ { pre_PP, NULL, 0 }, /* LP */
{ pre_PP, NULL, 0 }, /* PP */
- { pre_abort, NULL, 0 }, /* P */
+ { pre_PP, NULL, 0 }, /* P */
{ pre_IP, post_IP, 0 }, /* IP */
{ pre_HP, post_HP, 0 }, /* HP */
{ NULL, NULL, 0 }, /* SM */
@@ -121,10 +121,10 @@ static const struct man_term_act man_term_acts[MAN_MAX - MAN_TH] = {
{ pre_alternate, NULL, 0 }, /* RI */
{ NULL, NULL, 0 }, /* RE */
{ pre_RS, post_RS, 0 }, /* RS */
- { pre_DT, NULL, 0 }, /* DT */
+ { pre_DT, NULL, MAN_NOTEXT }, /* DT */
{ pre_ign, NULL, MAN_NOTEXT }, /* UC */
{ pre_PD, NULL, MAN_NOTEXT }, /* PD */
- { pre_ign, NULL, 0 }, /* AT */
+ { pre_ign, NULL, MAN_NOTEXT }, /* AT */
{ pre_in, NULL, MAN_NOTEXT }, /* in */
{ pre_SY, post_SY, 0 }, /* SY */
{ NULL, NULL, 0 }, /* YS */
@@ -135,6 +135,7 @@ static const struct man_term_act man_term_acts[MAN_MAX - MAN_TH] = {
{ NULL, NULL, 0 }, /* UE */
{ pre_UR, post_UR, 0 }, /* MT */
{ NULL, NULL, 0 }, /* ME */
+ { pre_MR, NULL, 0 }, /* MR */
};
static const struct man_term_act *man_term_act(enum roff_tok);
@@ -152,19 +153,15 @@ terminal_man(void *arg, const struct roff_meta *man)
struct mtermp mt;
struct termp *p;
struct roff_node *n, *nc, *nn;
- size_t save_defindent;
p = (struct termp *)arg;
- save_defindent = p->defindent;
- if (p->synopsisonly == 0 && p->defindent == 0)
- p->defindent = 7;
p->tcol->rmargin = p->maxrmargin = p->defrmargin;
term_tab_set(p, NULL);
term_tab_set(p, "T");
term_tab_set(p, ".5i");
memset(&mt, 0, sizeof(mt));
- mt.lmargin[mt.lmargincur] = term_len(p, p->defindent);
+ mt.lmargin[mt.lmargincur] = term_len(p, 7);
mt.offset = term_len(p, p->defindent);
mt.pardist = 1;
@@ -194,7 +191,6 @@ terminal_man(void *arg, const struct roff_meta *man)
print_man_nodelist(p, &mt, n, man);
term_end(p);
}
- p->defindent = save_defindent;
}
/*
@@ -225,13 +221,6 @@ print_bvspace(struct termp *p, struct roff_node *n, int pardist)
term_vspace(p);
}
-
-static int
-pre_abort(DECL_ARGS)
-{
- abort();
-}
-
static int
pre_ign(DECL_ARGS)
{
@@ -336,6 +325,29 @@ pre_B(DECL_ARGS)
}
static int
+pre_MR(DECL_ARGS)
+{
+ term_fontrepl(p, TERMFONT_NONE);
+ n = n->child;
+ if (n != NULL) {
+ term_word(p, n->string); /* name */
+ p->flags |= TERMP_NOSPACE;
+ }
+ term_word(p, "(");
+ p->flags |= TERMP_NOSPACE;
+ if (n != NULL && (n = n->next) != NULL) {
+ term_word(p, n->string); /* section */
+ p->flags |= TERMP_NOSPACE;
+ }
+ term_word(p, ")");
+ if (n != NULL && (n = n->next) != NULL) {
+ p->flags |= TERMP_NOSPACE;
+ term_word(p, n->string); /* suffix */
+ }
+ return 0;
+}
+
+static int
pre_OP(DECL_ARGS)
{
term_word(p, "[");
@@ -487,7 +499,7 @@ pre_PP(DECL_ARGS)
{
switch (n->type) {
case ROFFT_BLOCK:
- mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->lmargin[mt->lmargincur] = term_len(p, 7);
print_bvspace(p, n, mt->pardist);
break;
case ROFFT_HEAD:
@@ -517,7 +529,7 @@ pre_IP(DECL_ARGS)
p->trailspace = 1;
break;
case ROFFT_BODY:
- p->flags |= TERMP_NOSPACE;
+ p->flags |= TERMP_NOSPACE | TERMP_NONEWLINE;
break;
default:
abort();
@@ -591,7 +603,7 @@ pre_TP(DECL_ARGS)
p->trailspace = 1;
break;
case ROFFT_BODY:
- p->flags |= TERMP_NOSPACE;
+ p->flags |= TERMP_NOSPACE | TERMP_NONEWLINE;
break;
default:
abort();
@@ -663,7 +675,7 @@ pre_SS(DECL_ARGS)
switch (n->type) {
case ROFFT_BLOCK:
- mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->lmargin[mt->lmargincur] = term_len(p, 7);
mt->offset = term_len(p, p->defindent);
/*
@@ -704,7 +716,7 @@ pre_SH(DECL_ARGS)
switch (n->type) {
case ROFFT_BLOCK:
- mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->lmargin[mt->lmargincur] = term_len(p, 7);
mt->offset = term_len(p, p->defindent);
/*
@@ -788,7 +800,7 @@ pre_RS(DECL_ARGS)
if (++mt->lmarginsz < MAXMARGINS)
mt->lmargincur = mt->lmarginsz;
- mt->lmargin[mt->lmargincur] = term_len(p, p->defindent);
+ mt->lmargin[mt->lmargincur] = term_len(p, 7);
return 1;
}
@@ -900,6 +912,22 @@ print_man_node(DECL_ARGS)
const struct man_term_act *act;
int c;
+ /*
+ * In no-fill mode, break the output line at the beginning
+ * of new input lines except after \c, and nowhere else.
+ */
+
+ if (n->flags & NODE_NOFILL) {
+ if (n->flags & NODE_LINE &&
+ (p->flags & TERMP_NONEWLINE) == 0)
+ term_newln(p);
+ p->flags |= TERMP_BRNEVER;
+ } else {
+ if (n->flags & NODE_LINE)
+ term_tab_ref(p);
+ p->flags &= ~TERMP_BRNEVER;
+ }
+
if (n->flags & NODE_ID)
term_tag_write(n, p->line);
@@ -935,7 +963,7 @@ print_man_node(DECL_ARGS)
return;
case ROFFT_TBL:
if (p->tbl.cols == NULL)
- term_vspace(p);
+ term_newln(p);
term_tbl(p, n->span);
return;
default:
@@ -964,27 +992,11 @@ print_man_node(DECL_ARGS)
term_fontrepl(p, TERMFONT_NONE);
out:
- /*
- * If we're in a literal context, make sure that words
- * together on the same line stay together. This is a
- * POST-printing call, so we check the NEXT word. Since
- * -man doesn't have nested macros, we don't need to be
- * more specific than this.
- */
- if (n->flags & NODE_NOFILL &&
- ! (p->flags & (TERMP_NOBREAK | TERMP_NONEWLINE)) &&
- (n->next == NULL || n->next->flags & NODE_LINE)) {
- p->flags |= TERMP_BRNEVER | TERMP_NOSPACE;
- if (n->string != NULL && *n->string != '\0')
- term_flushln(p);
- else
- term_newln(p);
- p->flags &= ~TERMP_BRNEVER;
- if (p->tcol->rmargin < p->maxrmargin &&
- n->parent->tok == MAN_HP) {
- p->tcol->offset = p->tcol->rmargin;
- p->tcol->rmargin = p->maxrmargin;
- }
+ if (n->parent->tok == MAN_HP && n->parent->type == ROFFT_BODY &&
+ n->prev == NULL && n->flags & NODE_NOFILL) {
+ term_newln(p);
+ p->tcol->offset = p->tcol->rmargin;
+ p->tcol->rmargin = p->maxrmargin;
}
if (n->flags & NODE_EOS)
p->flags |= TERMP_SENTENCE;
diff --git a/man_validate.c b/man_validate.c
index 404b223f2b54..857adba2798f 100644
--- a/man_validate.c
+++ b/man_validate.c
@@ -1,6 +1,6 @@
-/* $Id: man_validate.c,v 1.156 2021/08/10 12:55:03 schwarze Exp $ */
+/* $Id: man_validate.c,v 1.159 2023/10/24 20:53:12 schwarze Exp $ */
/*
- * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2012-2020, 2023 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -33,6 +33,7 @@
#include "mandoc_aux.h"
#include "mandoc.h"
+#include "mandoc_xr.h"
#include "roff.h"
#include "man.h"
#include "libmandoc.h"
@@ -44,7 +45,6 @@
typedef void (*v_check)(CHKARGS);
-static void check_abort(CHKARGS) __attribute__((__noreturn__));
static void check_par(CHKARGS);
static void check_part(CHKARGS);
static void check_root(CHKARGS);
@@ -55,6 +55,7 @@ static void post_AT(CHKARGS);
static void post_EE(CHKARGS);
static void post_EX(CHKARGS);
static void post_IP(CHKARGS);
+static void post_MR(CHKARGS);
static void post_OP(CHKARGS);
static void post_SH(CHKARGS);
static void post_TH(CHKARGS);
@@ -69,9 +70,9 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = {
post_SH, /* SS */
post_TP, /* TP */
post_TP, /* TQ */
- check_abort,/* LP */
+ check_par, /* LP */
check_par, /* PP */
- check_abort,/* P */
+ check_par, /* P */
post_IP, /* IP */
NULL, /* HP */
NULL, /* SM */
@@ -101,6 +102,7 @@ static const v_check man_valids[MAN_MAX - MAN_TH] = {
NULL, /* UE */
post_UR, /* MT */
NULL, /* ME */
+ post_MR, /* MR */
};
@@ -112,25 +114,11 @@ man_validate(struct roff_man *man)
const v_check *cp;
/*
- * Translate obsolete macros such that later code
- * does not need to look for them.
- */
-
- n = man->last;
- switch (n->tok) {
- case MAN_LP:
- case MAN_P:
- n->tok = MAN_PP;
- break;
- default:
- break;
- }
-
- /*
* Iterate over all children, recursing into each one
* in turn, depth-first.
*/
+ n = man->last;
man->last = man->last->child;
while (man->last != NULL) {
man_validate(man);
@@ -200,12 +188,6 @@ check_root(CHKARGS)
"(OpenBSD)" : "(NetBSD)");
}
-static void
-check_abort(CHKARGS)
-{
- abort();
-}
-
/*
* Skip leading whitespace, dashes, backslashes, and font escapes,
* then create a tag if the first following byte is a letter.
@@ -340,7 +322,8 @@ post_SH(CHKARGS)
return;
}
- if (nc->tok == MAN_PP && nc->body->child != NULL) {
+ if ((nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P) &&
+ nc->body->child != NULL) {
while (nc->body->last != NULL) {
man->next = ROFF_NEXT_CHILD;
roff_node_relink(man, nc->body->last);
@@ -348,7 +331,8 @@ post_SH(CHKARGS)
}
}
- if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) {
+ if (nc->tok == MAN_LP || nc->tok == MAN_PP || nc->tok == MAN_P ||
+ nc->tok == ROFF_sp || nc->tok == ROFF_br) {
mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos,
"%s after %s", roff_name[nc->tok], roff_name[n->tok]);
roff_node_delete(man, nc);
@@ -373,13 +357,11 @@ post_UR(CHKARGS)
if (n->type == ROFFT_HEAD && n->child == NULL)
mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos,
"%s", roff_name[n->tok]);
- check_part(man, n);
}
static void
check_part(CHKARGS)
{
-
if (n->type == ROFFT_BODY && n->child == NULL)
mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos,
"%s", roff_name[n->tok]);
@@ -568,6 +550,32 @@ post_TH(CHKARGS)
}
static void
+post_MR(CHKARGS)
+{
+ struct roff_node *nch;
+
+ if ((nch = n->child) == NULL) {
+ mandoc_msg(MANDOCERR_NM_NONAME, n->line, n->pos, "MR");
+ return;
+ }
+ if (nch->next == NULL) {
+ mandoc_msg(MANDOCERR_XR_NOSEC,
+ n->line, n->pos, "MR %s", nch->string);
+ return;
+ }
+ if (mandoc_xr_add(nch->next->string, nch->string, nch->line, nch->pos))
+ mandoc_msg(MANDOCERR_XR_SELF, nch->line, nch->pos,
+ "MR %s %s", nch->string, nch->next->string);
+ if ((nch = nch->next->next) == NULL || nch->next == NULL)
+ return;
+
+ mandoc_msg(MANDOCERR_ARG_EXCESS, nch->next->line, nch->next->pos,
+ "MR ... %s", nch->next->string);
+ while (nch->next != NULL)
+ roff_node_delete(man, nch->next);
+}
+
+static void
post_UC(CHKARGS)
{
static const char * const bsd_versions[] = {
diff --git a/mandoc.1 b/mandoc.1
index f7490963b4bd..b1e0cf118336 100644
--- a/mandoc.1
+++ b/mandoc.1
@@ -1,6 +1,6 @@
-.\" $OpenBSD: mandoc.1,v 1.166 2020/02/15 15:28:01 schwarze Exp $
+.\" $Id: mandoc.1,v 1.267 2023/11/13 19:13:01 schwarze Exp $
.\"
-.\" Copyright (c) 2012, 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2012, 2014-2023 Ingo Schwarze <schwarze@openbsd.org>
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: August 14 2021 $
+.Dd $Mdocdate: November 13 2023 $
.Dt MANDOC 1
.Os
.Sh NAME
@@ -287,10 +287,7 @@ arguments are accepted:
.It Cm indent Ns = Ns Ar indent
The left margin for normal text is set to
.Ar indent
-blank characters instead of the default of five for
-.Xr mdoc 7
-and seven for
-.Xr man 7 .
+blank characters instead of the default of five.
Increasing this is not recommended; it may result in degraded formatting,
for example overfull lines or ugly line breaks.
When output is to a pager on a terminal that is less than 66 columns
@@ -302,8 +299,7 @@ input files in
.Xr mdoc 7
output style.
This prints the operating system name rather than the page title
-on the right side of the footer line, and it implies
-.Fl O Cm indent Ns =5 .
+on the right side of the footer line.
One useful application is for checking that
.Fl T Cm man
output formats in the same way as the
@@ -342,21 +338,9 @@ and may exceed the output width.
Output produced by
.Fl T Cm html
conforms to HTML5 using optional self-closing tags.
-Default styles use only CSS1.
Equations rendered from
.Xr eqn 7
blocks use MathML.
-.Pp
-The file
-.Pa /usr/share/misc/mandoc.css
-documents style-sheet classes available for customising output.
-If a style-sheet is not specified with
-.Fl O Cm style ,
-.Fl T Cm html
-defaults to simple output (via an embedded style-sheet)
-readable in any graphical or text-based web
-browser.
-.Pp
Non-ASCII characters are rendered
as hexadecimal Unicode character references.
.Pp
@@ -407,9 +391,49 @@ otherwise, the second format is used.
.It Cm style Ns = Ns Ar style.css
The file
.Ar style.css
-is used for an external style-sheet.
+is used as an external stylesheet.
This must be a valid absolute or
relative URI.
+.Pp
+Using the file
+.Pa mandoc.css
+that is distributed with
+.Nm
+is recommended.
+It provides an appearance similar to terminal output with some additional
+features specific to
+.Nm
+HTML output, in particular making anchor locations that support
+deep linking stand out visually by putting a dotted line under them,
+providing tooltips showing the semantic function of elements (macro
+names), providing some simple aspects of responsive web design, and
+providing simple support for users who prefer a dark color scheme.
+.Pp
+Using a custom CSS file is possible, but writing it requires
+proficiency in all of the languages HTML 5, CSS 4, and
+.Xr mdoc 7
+and familiarity with the
+.Nm Ns -specific
+classes used in
+.Pa mandoc.css .
+Besides, while the file
+.Pa mandoc.css
+is always adapted to the HTML output generated by the
+.Nm
+version it is distributed with, maintaining a custom CSS file usually
+requires adaptations each time
+.Nm
+is upgraded to a new version.
+.Pp
+If a stylesheet is not specified with
+.Fl O Cm style ,
+.Fl T Cm html
+embeds a minimal stylesheet into the HTML output, mostly to select
+adequate font-style and font-weight attributes for various macros.
+The result is readable in any graphical or text-based web browser,
+but does not aim for looking similar to terminal output.
+Instead, formatting is mostly left to browser defaults
+and to user settings in the browser configuration.
.It Cm tag Ns Op = Ns Ar term
Same syntax and semantics as for
.Sx ASCII Output .
@@ -487,10 +511,10 @@ Use
to translate
.Xr mdoc 7
input to the markdown format conforming to
-.Lk http://daringfireball.net/projects/markdown/syntax.text\
+.Lk https://daringfireball.net/projects/markdown/syntax.text\
"John Gruber's 2004 specification" .
The output also almost conforms to the
-.Lk http://commonmark.org/ CommonMark
+.Lk https://commonmark.org/ CommonMark
specification.
.Pp
The character set used for the markdown output is ASCII.
@@ -735,11 +759,11 @@ output mode implies
.Sh EXAMPLES
To page manuals to the terminal:
.Pp
-.Dl $ mandoc -l mandoc.1 man.1 apropos.1 makewhatis.8
+.Dl $ mandoc -a mandoc.1 man.1 apropos.1 makewhatis.8
.Pp
To produce HTML manuals with
.Pa /usr/share/misc/mandoc.css
-as the style-sheet:
+as the stylesheet:
.Pp
.Dl $ mandoc \-T html -O style=/usr/share/misc/mandoc.css mdoc.7 > mdoc.7.html
.Pp
@@ -766,7 +790,7 @@ Messages displayed by
follow this format:
.Bd -ragged -offset indent
.Nm :
-.Ar file : Ns Ar line : Ns Ar column : level : message : macro arguments
+.Ar file : Ns Ar line : Ns Ar column : level : message : macro argument ...
.Pq Ar os
.Ed
.Pp
@@ -786,9 +810,7 @@ and
strings are explained below.
The name of the
.Ar macro
-triggering the message and its
-.Ar arguments
-are omitted where meaningless.
+triggering the message and its arguments are omitted where meaningless.
The
.Ar os
operating system specifier is omitted for messages that are relevant
@@ -835,7 +857,7 @@ message levels, the
.Cm style
level tries to reduce the probability that issues go unnoticed,
so it may occasionally issue bogus suggestions.
-Please use your good judgement to decide whether any particular
+Use your judgement to decide whether any particular
.Cm style
suggestion really justifies a change to the input file.
.It Cm base
@@ -1269,9 +1291,11 @@ The same standard section title occurs more than once.
A standard section header occurs in a section of the manual
where it normally isn't useful.
.It Sy "cross reference to self"
-.Pq mdoc
+.Pq mdoc , man
An
.Ic \&Xr
+or
+.Ic \&MR
macro refers to a name and section matching the section of the present
manual page and a name mentioned in an
.Ic \&Nm
@@ -1474,10 +1498,8 @@ A
.Ic \&Bl ,
.Ic \&D1 ,
.Ic \&Dl ,
-.Ic \&MT ,
-.Ic \&RS ,
or
-.Ic \&UR
+.Ic \&RS
block contains nothing in its body and will produce no output.
.It Sy "empty argument, using 0n"
.Pq mdoc
@@ -1592,12 +1614,16 @@ macro is immediately followed by an
macro on the next input line.
Such an empty block does not produce any output.
.It Sy "missing section argument"
-.Pq mdoc
+.Pq mdoc , man
An
.Ic \&Xr
+or
+.Ic \&MR
macro lacks its second, section number argument.
-The first argument, i.e. the name, is printed, but without subsequent
-parentheses.
+The first argument, i.e. the name, is printed, but without a section number.
+In the case of
+.Ic \&Xr ,
+the parentheses are also omitted.
.It Sy "missing -std argument, adding it"
.Pq mdoc
An
@@ -1754,6 +1780,15 @@ request or a
layout modifier has an unknown
.Ar font
argument.
+.It Sy "ignoring distance argument"
+.Pq roff
+In addition to the margin character, an
+.Ic \&mc
+request has a second argument supposed to represent a distance, but the
+.Nm
+implementation of
+.Ic \&mc
+always ignores the second argument.
.It Sy "odd number of characters in request"
.Pq roff
A
@@ -1788,23 +1823,10 @@ it is hard to predict which tab stop position the tab will advance to.
.Pq mdoc
A new sentence starts in the middle of a text line.
Start it on a new input line to help formatters produce correct spacing.
-.It Sy "invalid escape sequence"
+.It Sy "invalid escape sequence argument"
.Pq roff
-An escape sequence has an invalid opening argument delimiter, lacks the
-closing argument delimiter, the argument is of an invalid form, or it is
-a character escape sequence with an invalid name.
-If the argument is incomplete,
-.Ic \e*
-and
-.Ic \en
-expand to an empty string,
-.Ic \eB
-to the digit
-.Sq 0 ,
-and
-.Ic \ew
-to the length of the incomplete argument.
-All other invalid escape sequences are ignored.
+The argument of an escape sequence is of an invalid form.
+Invalid escape sequences are ignored.
.It Sy "undefined escape, printing literally"
.Pq roff
In an escape sequence, the first character
@@ -2082,6 +2104,13 @@ and expands to the empty string.
.Pq roff
The argument of the escape sequence \e$ is not a digit;
the escape sequence expands to the empty string.
+.It Sy "negative argument, using 0"
+.Pq roff
+A
+.Ic \&shift
+request has a negative argument
+or an argument that is negative due to integer overflow.
+Macro argument numbering remains unchanged.
.It Sy "NOT IMPLEMENTED: Bd -file"
.Pq mdoc
For security reasons, the
@@ -2117,11 +2146,20 @@ The first argument of a
request is neither a single ASCII character
nor a single character escape sequence.
The request is ignored including all its arguments.
+.It Sy "skipping unusable escape sequence"
+.Pq roff
+The first argument of an
+.Ic mc
+request is neither a single ASCII character
+nor a single character escape sequence.
+All arguments are ignored and printing of a margin character is disabled.
.It Sy "missing manual name, using \(dq\(dq"
-.Pq mdoc
+.Pq mdoc , man
The first call to
.Ic \&Nm ,
-or any call in the NAME section, lacks the required argument.
+or any call in the NAME section, lacks the required argument, or
+.Ic \&MR
+is called without any argument.
.It Sy "uname(3) system call failed, using UNKNOWN"
.Pq mdoc
The
@@ -2249,6 +2287,8 @@ or a request of the
family with more than two arguments
.It
.Ic \&Dt
+or
+.Ic \&MR
with more than three arguments
.It
.Ic \&TH
@@ -2262,6 +2302,60 @@ with invalid arguments
.El
The excess arguments are ignored.
.El
+.Ss "Errors related to escape sequences"
+.Bl -ohang
+.It Sy "incomplete escape sequence"
+.Pq roff
+The end of the input line is encountered
+while parsing the argument of an escape sequence.
+In this case,
+.Ic \e*
+and
+.Ic \en
+expand to an empty string,
+.Ic \eB
+to the digit
+.Sq 0 ,
+and
+.Ic \ew
+to the length of the incomplete argument.
+All other incomplete escape sequences are ignored.
+.It Sy "invalid special character"
+.Pq roff
+A special character escape sequence is invalid,
+for example a Unicode sequence pointing to a surrogate
+or beyond the Unicode range, a \e[char...] escape sequence
+representing a control character or pointing beyond the
+.Vt unsigned char
+range, or an invalid variable-length form
+of a single-byte character escape sequence, for example writing
+.Qq \e[e]
+or
+.Qq \e[~]
+instead of
+.Qq \ee
+or
+.Qq \e~ ,
+respectively.
+The escape sequence is ignored.
+.It Sy "unknown special character"
+.Pq roff
+The name given in a special character escape sequence is not known to
+.Nm .
+The escape sequence is ignored.
+.It Sy "invalid escape argument delimiter"
+.Pq roff
+An escape sequence that expects a numerical argument
+attempts to employ one of the characters
+.Qq " %&()*+-./0123456789:<=>"
+as an argument delimiter.
+The escape sequence is ignored including the invalid opening delimiter
+and the rest of the argument may appear as output text.
+While various characters can be used as argument delimiters,
+using the apostrophe-quote character
+.Pq Sq \(aq
+is recommended for readability and robustness.
+.El
.Ss Unsupported features
.Bl -ohang
.It Sy "input too large"
diff --git a/mandoc.c b/mandoc.c
index 6adf1a4318b2..ad00a01a01f7 100644
--- a/mandoc.c
+++ b/mandoc.c
@@ -1,7 +1,8 @@
-/* $Id: mandoc.c,v 1.119 2021/08/10 12:55:03 schwarze Exp $ */
+/* $Id: mandoc.c,v 1.121 2022/05/19 15:37:47 schwarze Exp $ */
/*
- * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
+ * Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -14,6 +15,11 @@
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Utility functions to handle end of sentence punctuation
+ * and dates and times, for use by mdoc(7) and man(7) parsers.
+ * Utility functions to handle fonts and numbers,
+ * for use by mandoc(1) parsers and formatters.
*/
#include "config.h"
@@ -91,388 +97,6 @@ mandoc_font(const char *cp, int sz)
}
}
-enum mandoc_esc
-mandoc_escape(const char **end, const char **start, int *sz)
-{
- const char *local_start;
- int local_sz, c, i;
- char term;
- enum mandoc_esc gly;
-
- /*
- * When the caller doesn't provide return storage,
- * use local storage.
- */
-
- if (NULL == start)
- start = &local_start;
- if (NULL == sz)
- sz = &local_sz;
-
- /*
- * Treat "\E" just like "\";
- * it only makes a difference in copy mode.
- */
-
- if (**end == 'E')
- ++*end;
-
- /*
- * Beyond the backslash, at least one input character
- * is part of the escape sequence. With one exception
- * (see below), that character won't be returned.
- */
-
- gly = ESCAPE_ERROR;
- *start = ++*end;
- *sz = 0;
- term = '\0';
-
- switch ((*start)[-1]) {
- /*
- * First the glyphs. There are several different forms of
- * these, but each eventually returns a substring of the glyph
- * name.
- */
- case '(':
- gly = ESCAPE_SPECIAL;
- *sz = 2;
- break;
- case '[':
- if (**start == ' ') {
- ++*end;
- return ESCAPE_ERROR;
- }
- gly = ESCAPE_SPECIAL;
- term = ']';
- break;
- case 'C':
- if ('\'' != **start)
- return ESCAPE_ERROR;
- *start = ++*end;
- gly = ESCAPE_SPECIAL;
- term = '\'';
- break;
-
- /*
- * Escapes taking no arguments at all.
- */
- case '!':
- case '?':
- return ESCAPE_UNSUPP;
- case '%':
- case '&':
- case ')':
- case ',':
- case '/':
- case '^':
- case 'a':
- case 'd':
- case 'r':
- case 't':
- case 'u':
- case '{':
- case '|':
- case '}':
- return ESCAPE_IGNORE;
- case 'c':
- return ESCAPE_NOSPACE;
- case 'p':
- return ESCAPE_BREAK;
-
- /*
- * The \z escape is supposed to output the following
- * character without advancing the cursor position.
- * Since we are mostly dealing with terminal mode,
- * let us just skip the next character.
- */
- case 'z':
- return ESCAPE_SKIPCHAR;
-
- /*
- * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
- * 'X' is the trigger. These have opaque sub-strings.
- */
- case 'F':
- case 'f':
- case 'g':
- case 'k':
- case 'M':
- case 'm':
- case 'n':
- case 'O':
- case 'V':
- case 'Y':
- case '*':
- switch ((*start)[-1]) {
- case 'f':
- gly = ESCAPE_FONT;
- break;
- case '*':
- gly = ESCAPE_DEVICE;
- break;
- default:
- gly = ESCAPE_IGNORE;
- break;
- }
- switch (**start) {
- case '(':
- if ((*start)[-1] == 'O')
- gly = ESCAPE_ERROR;
- *start = ++*end;
- *sz = 2;
- break;
- case '[':
- if ((*start)[-1] == 'O')
- gly = (*start)[1] == '5' ?
- ESCAPE_UNSUPP : ESCAPE_ERROR;
- *start = ++*end;
- term = ']';
- break;
- default:
- if ((*start)[-1] == 'O') {
- switch (**start) {
- case '0':
- gly = ESCAPE_UNSUPP;
- break;
- case '1':
- case '2':
- case '3':
- case '4':
- break;
- default:
- gly = ESCAPE_ERROR;
- break;
- }
- }
- *sz = 1;
- break;
- }
- break;
-
- /*
- * These escapes are of the form \X'Y', where 'X' is the trigger
- * and 'Y' is any string. These have opaque sub-strings.
- * The \B and \w escapes are handled in roff.c, roff_res().
- */
- case 'A':
- case 'b':
- case 'D':
- case 'R':
- case 'X':
- case 'Z':
- gly = ESCAPE_IGNORE;
- /* FALLTHROUGH */
- case 'o':
- if (**start == '\0')
- return ESCAPE_ERROR;
- if (gly == ESCAPE_ERROR)
- gly = ESCAPE_OVERSTRIKE;
- term = **start;
- *start = ++*end;
- break;
-
- /*
- * These escapes are of the form \X'N', where 'X' is the trigger
- * and 'N' resolves to a numerical expression.
- */
- case 'h':
- case 'H':
- case 'L':
- case 'l':
- case 'S':
- case 'v':
- case 'x':
- if (strchr(" %&()*+-./0123456789:<=>", **start)) {
- if ('\0' != **start)
- ++*end;
- return ESCAPE_ERROR;
- }
- switch ((*start)[-1]) {
- case 'h':
- gly = ESCAPE_HORIZ;
- break;
- case 'l':
- gly = ESCAPE_HLINE;
- break;
- default:
- gly = ESCAPE_IGNORE;
- break;
- }
- term = **start;
- *start = ++*end;
- break;
-
- /*
- * Special handling for the numbered character escape.
- * XXX Do any other escapes need similar handling?
- */
- case 'N':
- if ('\0' == **start)
- return ESCAPE_ERROR;
- (*end)++;
- if (isdigit((unsigned char)**start)) {
- *sz = 1;
- return ESCAPE_IGNORE;
- }
- (*start)++;
- while (isdigit((unsigned char)**end))
- (*end)++;
- *sz = *end - *start;
- if ('\0' != **end)
- (*end)++;
- return ESCAPE_NUMBERED;
-
- /*
- * Sizes get a special category of their own.
- */
- case 's':
- gly = ESCAPE_IGNORE;
-
- /* See +/- counts as a sign. */
- if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
- *start = ++*end;
-
- switch (**end) {
- case '(':
- *start = ++*end;
- *sz = 2;
- break;
- case '[':
- *start = ++*end;
- term = ']';
- break;
- case '\'':
- *start = ++*end;
- term = '\'';
- break;
- case '3':
- case '2':
- case '1':
- *sz = (*end)[-1] == 's' &&
- isdigit((unsigned char)(*end)[1]) ? 2 : 1;
- break;
- default:
- *sz = 1;
- break;
- }
-
- break;
-
- /*
- * Several special characters can be encoded as
- * one-byte escape sequences without using \[].
- */
- case ' ':
- case '\'':
- case '-':
- case '.':
- case '0':
- case ':':
- case '_':
- case '`':
- case 'e':
- case '~':
- gly = ESCAPE_SPECIAL;
- /* FALLTHROUGH */
- default:
- if (gly == ESCAPE_ERROR)
- gly = ESCAPE_UNDEF;
- *start = --*end;
- *sz = 1;
- break;
- }
-
- /*
- * Read up to the terminating character,
- * paying attention to nested escapes.
- */
-
- if ('\0' != term) {
- while (**end != term) {
- switch (**end) {
- case '\0':
- return ESCAPE_ERROR;
- case '\\':
- (*end)++;
- if (ESCAPE_ERROR ==
- mandoc_escape(end, NULL, NULL))
- return ESCAPE_ERROR;
- break;
- default:
- (*end)++;
- break;
- }
- }
- *sz = (*end)++ - *start;
-
- /*
- * The file chars.c only provides one common list
- * of character names, but \[-] == \- is the only
- * one of the characters with one-byte names that
- * allows enclosing the name in brackets.
- */
- if (gly == ESCAPE_SPECIAL && *sz == 1 && **start != '-')
- return ESCAPE_ERROR;
- } else {
- assert(*sz > 0);
- if ((size_t)*sz > strlen(*start))
- return ESCAPE_ERROR;
- *end += *sz;
- }
-
- /* Run post-processors. */
-
- switch (gly) {
- case ESCAPE_FONT:
- gly = mandoc_font(*start, *sz);
- break;
- case ESCAPE_SPECIAL:
- if (**start == 'c') {
- if (*sz < 6 || *sz > 7 ||
- strncmp(*start, "char", 4) != 0 ||
- (int)strspn(*start + 4, "0123456789") + 4 < *sz)
- break;
- c = 0;
- for (i = 4; i < *sz; i++)
- c = 10 * c + ((*start)[i] - '0');
- if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff)
- break;
- *start += 4;
- *sz -= 4;
- gly = ESCAPE_NUMBERED;
- break;
- }
-
- /*
- * Unicode escapes are defined in groff as \[u0000]
- * to \[u10FFFF], where the contained value must be
- * a valid Unicode codepoint. Here, however, only
- * check the length and range.
- */
- if (**start != 'u' || *sz < 5 || *sz > 7)
- break;
- if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0'))
- break;
- if (*sz == 6 && (*start)[1] == '0')
- break;
- if (*sz == 5 && (*start)[1] == 'D' &&
- strchr("89ABCDEF", (*start)[2]) != NULL)
- break;
- if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef")
- + 1 == *sz)
- gly = ESCAPE_UNICODE;
- break;
- case ESCAPE_DEVICE:
- assert(*sz == 2 && (*start)[0] == '.' && (*start)[1] == 'T');
- break;
- default:
- break;
- }
-
- return gly;
-}
-
static int
a2time(time_t *t, const char *fmt, const char *p)
{
diff --git a/mandoc.css b/mandoc.css
index ceac503a79f6..1dae127059d6 100644
--- a/mandoc.css
+++ b/mandoc.css
@@ -1,4 +1,4 @@
-/* $Id: mandoc.css,v 1.48 2021/03/30 19:26:20 schwarze Exp $ */
+/* $Id: mandoc.css,v 1.52 2022/07/06 14:34:59 schwarze Exp $ */
/*
* Standard style sheet for mandoc(1) -Thtml and man.cgi(8).
*
@@ -16,7 +16,7 @@ html { max-width: 65em;
body { background: var(--bg);
color: var(--fg);
font-family: Helvetica,Arial,sans-serif; }
-h1 { font-size: 110%; }
+h1, h2 { font-size: 110%; }
table { margin-top: 0em;
margin-bottom: 0em;
border-collapse: collapse; }
@@ -53,31 +53,39 @@ table.results { margin-top: 1em;
/* Header and footer lines. */
-table.head { width: 100%;
+div[role=doc-pageheader] {
+ display: flex;
border-bottom: 1px dotted #808080;
margin-bottom: 1em;
font-size: smaller; }
-td.head-vol { text-align: center; }
-td.head-rtitle {
+.head-ltitle { flex: 1; }
+.head-vol { flex: 0 1 auto;
+ text-align: center; }
+.head-rtitle { flex: 1;
text-align: right; }
-table.foot { width: 100%;
+div[role=doc-pagefooter] {
+ display: flex;
+ justify-content: space-between;
border-top: 1px dotted #808080;
margin-top: 1em;
font-size: smaller; }
-td.foot-os { text-align: right; }
+.foot-left { flex: 1; }
+.foot-date { flex: 0 1 auto;
+ text-align: center; }
+.foot-os { flex: 1;
+ text-align: right; }
/* Sections and paragraphs. */
-.manual-text {
- margin-left: 3.8em; }
+main { margin-left: 3.8em; }
.Nd { }
section.Sh { }
-h1.Sh { margin-top: 1.2em;
+h2.Sh { margin-top: 1.2em;
margin-bottom: 0.6em;
margin-left: -3.2em; }
section.Ss { }
-h2.Ss { margin-top: 1.2em;
+h3.Ss { margin-top: 1.2em;
margin-bottom: 0.6em;
margin-left: -1.2em;
font-size: 105%; }
@@ -195,7 +203,7 @@ code.Nm { font-style: normal;
font-family: inherit; }
.Ar { font-style: italic;
font-weight: normal; }
-.Op { display: inline; }
+.Op { display: inline flow; }
.Ic { font-style: normal;
font-weight: bold;
font-family: inherit; }
@@ -250,7 +258,7 @@ a.In { }
/* Physical markup. */
-.Bf { display: inline; }
+.Bf { display: inline flow; }
.No { font-style: normal;
font-weight: normal; }
.Em { font-style: italic;
@@ -263,11 +271,11 @@ a.In { }
/* Tooltip support. */
-h1.Sh, h2.Ss { position: relative; }
+h2.Sh, h3.Ss { position: relative; }
.An, .Ar, .Cd, .Cm, .Dv, .Em, .Er, .Ev, .Fa, .Fd, .Fl, .Fn, .Ft,
.Ic, code.In, .Lb, .Lk, .Ms, .Mt, .Nd, code.Nm, .Pa, .Rs,
.St, .Sx, .Sy, .Va, .Vt, .Xr {
- display: inline-block;
+ display: inline flow;
position: relative; }
.An::before { content: "An"; }
@@ -293,8 +301,8 @@ code.In::before { content: "In"; }
code.Nm::before { content: "Nm"; }
.Pa::before { content: "Pa"; }
.Rs::before { content: "Rs"; }
-h1.Sh::before { content: "Sh"; }
-h2.Ss::before { content: "Ss"; }
+h2.Sh::before { content: "Sh"; }
+h3.Ss::before { content: "Ss"; }
.St::before { content: "St"; }
.Sx::before { content: "Sx"; }
.Sy::before { content: "Sy"; }
@@ -308,7 +316,7 @@ h2.Ss::before { content: "Ss"; }
.Ic::before, code.In::before, .Lb::before, .Lk::before,
.Ms::before, .Mt::before, .Nd::before, code.Nm::before,
.Pa::before, .Rs::before,
-h1.Sh::before, h2.Ss::before, .St::before, .Sx::before, .Sy::before,
+h2.Sh::before, h3.Ss::before, .St::before, .Sx::before, .Sy::before,
.Va::before, .Vt::before, .Xr::before {
opacity: 0;
transition: .15s ease opacity;
@@ -329,7 +337,7 @@ h1.Sh::before, h2.Ss::before, .St::before, .Sx::before, .Sy::before,
.Ft:hover::before, .Ic:hover::before, code.In:hover::before,
.Lb:hover::before, .Lk:hover::before, .Ms:hover::before, .Mt:hover::before,
.Nd:hover::before, code.Nm:hover::before, .Pa:hover::before,
-.Rs:hover::before, h1.Sh:hover::before, h2.Ss:hover::before, .St:hover::before,
+.Rs:hover::before, h2.Sh:hover::before, h3.Ss:hover::before, .St:hover::before,
.Sx:hover::before, .Sy:hover::before, .Va:hover::before, .Vt:hover::before,
.Xr:hover::before {
opacity: 1;
@@ -338,9 +346,8 @@ h1.Sh::before, h2.Ss::before, .St::before, .Sx::before, .Sy::before,
/* Overrides to avoid excessive margins on small devices. */
@media (max-width: 37.5em) {
-.manual-text {
- margin-left: 0.5em; }
-h1.Sh, h2.Ss { margin-left: 0em; }
+main { margin-left: 0.5em; }
+h2.Sh, h3.Ss { margin-left: 0em; }
.Bd-indent { margin-left: 2em; }
.Bl-hang > dd {
margin-left: 2em; }
diff --git a/mandoc.h b/mandoc.h
index 9837ff2ae9df..9ab68327ecb4 100644
--- a/mandoc.h
+++ b/mandoc.h
@@ -1,6 +1,6 @@
-/* $Id: mandoc.h,v 1.274 2021/08/14 13:53:08 schwarze Exp $ */
+/* $Id: mandoc.h,v 1.282 2023/10/21 17:10:17 schwarze Exp $ */
/*
- * Copyright (c) 2012-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2012-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -20,8 +20,10 @@
*/
#define ASCII_NBRSP 31 /* non-breaking space */
-#define ASCII_HYPH 30 /* breakable hyphen */
-#define ASCII_BREAK 29 /* breakable zero-width space */
+#define ASCII_NBRZW 30 /* non-breaking zero-width space */
+#define ASCII_BREAK 29 /* breakable zero-width space */
+#define ASCII_HYPH 28 /* breakable hyphen */
+#define ASCII_TABREF 26 /* reset tab reference position */
/*
* Status level. This refers to both internal status (i.e., whilst
@@ -165,13 +167,14 @@ enum mandocerr {
MANDOCERR_SM_BAD, /* invalid Boolean argument: macro arg */
MANDOCERR_CHAR_FONT, /* argument contains two font escapes */
MANDOCERR_FT_BAD, /* unknown font, skipping request: ft font */
+ MANDOCERR_MC_DIST, /* ignoring distance argument: mc ... arg */
MANDOCERR_TR_ODD, /* odd number of characters in request: tr char */
/* related to plain text */
MANDOCERR_FI_BLANK, /* blank line in fill mode, using .sp */
MANDOCERR_FI_TAB, /* tab in filled text */
MANDOCERR_EOS, /* new sentence, new line */
- MANDOCERR_ESC_BAD, /* invalid escape sequence: esc */
+ MANDOCERR_ESC_ARG, /* invalid escape sequence argument: esc */
MANDOCERR_ESC_UNDEF, /* undefined escape, printing literally: char */
MANDOCERR_STR_UNDEF, /* undefined string, using "": name */
@@ -215,11 +218,13 @@ enum mandocerr {
MANDOCERR_NAMESC, /* escaped character not allowed in a name: name */
MANDOCERR_ARG_UNDEF, /* using macro argument outside macro */
MANDOCERR_ARG_NONUM, /* argument number is not numeric */
+ MANDOCERR_ARG_NEG, /* negative argument, using 0: request arg */
MANDOCERR_BD_FILE, /* NOT IMPLEMENTED: Bd -file */
MANDOCERR_BD_NOARG, /* skipping display without arguments: Bd */
MANDOCERR_BL_NOTYPE, /* missing list type, using -item: Bl */
MANDOCERR_CE_NONUM, /* argument is not numeric, using 1: ce ... */
MANDOCERR_CHAR_ARG, /* argument is not a character: char ... */
+ MANDOCERR_MC_ESC, /* skipping unusable escape sequence: mc arg */
MANDOCERR_NM_NONAME, /* missing manual name, using "": Nm */
MANDOCERR_OS_UNAME, /* uname(3) system call failed, using UNKNOWN */
MANDOCERR_ST_BAD, /* unknown standard specifier: St standard */
@@ -232,6 +237,12 @@ enum mandocerr {
MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */
MANDOCERR_DIVZERO, /* divide by zero */
+ /* related to escape sequences */
+ MANDOCERR_ESC_INCOMPLETE, /* incomplete escape sequence: esc */
+ MANDOCERR_ESC_BADCHAR, /* invalid special character: esc */
+ MANDOCERR_ESC_UNKCHAR, /* unknown special character: esc */
+ MANDOCERR_ESC_DELIM, /* invalid escape argument delimiter: esc */
+
MANDOCERR_UNSUPP, /* ===== start of unsupported features ===== */
MANDOCERR_TOOLARGE, /* input too large */
@@ -282,11 +293,12 @@ enum mandocerr {
};
enum mandoc_esc {
- ESCAPE_ERROR = 0, /* bail! unparsable escape */
- ESCAPE_UNSUPP, /* unsupported escape; ignore it */
- ESCAPE_IGNORE, /* escape to be ignored */
- ESCAPE_UNDEF, /* undefined escape; print literal character */
- ESCAPE_SPECIAL, /* a regular special character */
+ ESCAPE_EXPAND = 0, /* interpolation and iterative call needed */
+ ESCAPE_ERROR, /* non-fatal error: unparsable escape */
+ ESCAPE_UNSUPP, /* unsupported escape: warn and ignore */
+ ESCAPE_IGNORE, /* valid escape to be ignored */
+ ESCAPE_UNDEF, /* undefined escape: print literal character */
+ ESCAPE_SPECIAL, /* special character escape */
ESCAPE_FONT, /* a generic font mode */
ESCAPE_FONTBOLD, /* bold font mode */
ESCAPE_FONTITALIC, /* italic font mode */
diff --git a/mandoc_aux.c b/mandoc_aux.c
index 5d595ce0c292..47e150218eee 100644
--- a/mandoc_aux.c
+++ b/mandoc_aux.c
@@ -1,7 +1,7 @@
-/* $Id: mandoc_aux.c,v 1.11 2018/02/07 20:04:57 schwarze Exp $ */
+/* $Id: mandoc_aux.c,v 1.12 2022/04/14 16:43:44 schwarze Exp $ */
/*
+ * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -27,6 +27,7 @@
#include <stdio.h>
#include <string.h>
+#define DEBUG_NODEF 1
#include "mandoc.h"
#include "mandoc_aux.h"
diff --git a/mandoc_aux.h b/mandoc_aux.h
index 469e331eb4bf..342e3d721e24 100644
--- a/mandoc_aux.h
+++ b/mandoc_aux.h
@@ -1,7 +1,7 @@
-/* $Id: mandoc_aux.h,v 1.7 2017/06/12 19:05:47 schwarze Exp $ */
+/* $Id: mandoc_aux.h,v 1.8 2022/04/14 16:43:44 schwarze Exp $ */
/*
+ * Copyright (c) 2014, 2017, 2021 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014, 2017 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -25,3 +25,7 @@ void *mandoc_reallocarray(void *, size_t, size_t);
void *mandoc_recallocarray(void *, size_t, size_t, size_t);
char *mandoc_strdup(const char *);
char *mandoc_strndup(const char *, size_t);
+
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
diff --git a/mandoc_char.7 b/mandoc_char.7
index eb9e65acfc86..de546496e84b 100644
--- a/mandoc_char.7
+++ b/mandoc_char.7
@@ -1,8 +1,9 @@
-.\" $Id: mandoc_char.7,v 1.78 2020/10/31 11:45:16 schwarze Exp $
+.\" $Id: mandoc_char.7,v 1.79 2022/06/02 14:51:41 schwarze Exp $
.\"
.\" Copyright (c) 2003 Jason McIntyre <jmc@openbsd.org>
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2011,2013,2015,2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2011, 2013, 2015, 2017-2020, 2022
+.\" Ingo Schwarze <schwarze@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -16,7 +17,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: October 31 2020 $
+.Dd $Mdocdate: June 2 2022 $
.Dt MANDOC_CHAR 7
.Os
.Sh NAME
@@ -220,13 +221,18 @@ where it introduces a
request or a macro, and when appearing alone as a macro argument in
.Xr mdoc 7 .
In such situations, prepend a zero-width space
-.Pq Sq \e&.
+.Pq Sq \e&.\&
to make it behave like normal text.
.Pp
-Do not use the
+Do not use the character pair
.Sq \e.
-escape sequence.
-It does not prevent special handling of the period.
+to escape a period because
+.Sq \e.
+is not a character escape sequence, does not prevent special handling
+of the period under normal circumstances, and is only intended to
+be used in the very special situation of defining a user-defined
+macro that, when called, defines another user-defined macro, which
+no manual page is ever supposed to do.
.Ss Backslashes
To include a literal backslash
.Pq Sq \e
@@ -326,7 +332,6 @@ Punctuation:
.It \e(en Ta \(en Ta en-dash
.It \e(hy Ta \(hy Ta hyphen
.It \ee Ta \e Ta back-slash
-.It \e. Ta \. Ta period
.It \e(r! Ta \(r! Ta upside-down exclamation
.It \e(r? Ta \(r? Ta upside-down question
.El
diff --git a/mandoc_dbg.c b/mandoc_dbg.c
new file mode 100644
index 000000000000..7d5bf3b925df
--- /dev/null
+++ b/mandoc_dbg.c
@@ -0,0 +1,342 @@
+/* $Id: mandoc_dbg.c,v 1.1 2022/04/14 16:43:44 schwarze Exp $ */
+/*
+ * Copyright (c) 2021, 2022 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include "config.h"
+
+#include <sys/types.h>
+
+#if HAVE_ERR
+#include <err.h>
+#endif
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if HAVE_OHASH
+#include <ohash.h>
+#else
+#include "compat_ohash.h"
+#endif
+
+#define DEBUG_NODEF 1
+#include "mandoc_aux.h"
+#include "mandoc.h"
+
+/* Store information about one allocation. */
+struct dhash_entry {
+ const char *file;
+ int line;
+ const char *func;
+ size_t num;
+ size_t size;
+ void *ptr;
+};
+
+/* Store information about all allocations. */
+static struct ohash dhash_table;
+static FILE *dhash_fp;
+static int dhash_aflag;
+static int dhash_fflag;
+static int dhash_lflag;
+static int dhash_nflag;
+static int dhash_sflag;
+
+static void *dhash_alloc(size_t, void *);
+static void *dhash_calloc(size_t, size_t, void *);
+static void dhash_free(void *, void *);
+static unsigned int dhash_slot(void *);
+static void dhash_register(const char *, int, const char *,
+ size_t, size_t, void *, const char *);
+static void dhash_print(struct dhash_entry *);
+static void dhash_purge(const char *, int, const char *, void *);
+
+
+/* *** Debugging wrappers of public API functions. ************************ */
+
+int
+mandoc_dbg_asprintf(const char *file, int line,
+ char **dest, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = vasprintf(dest, fmt, ap);
+ va_end(ap);
+
+ if (ret == -1)
+ err((int)MANDOCLEVEL_SYSERR, NULL);
+
+ dhash_register(file, line, "asprintf", 1, strlen(*dest) + 1,
+ *dest, *dest);
+
+ return ret;
+}
+
+void *
+mandoc_dbg_calloc(size_t num, size_t size, const char *file, int line)
+{
+ void *ptr = mandoc_calloc(num, size);
+ dhash_register(file, line, "calloc", num, size, ptr, NULL);
+ return ptr;
+}
+
+void *
+mandoc_dbg_malloc(size_t size, const char *file, int line)
+{
+ void *ptr = mandoc_malloc(size);
+ dhash_register(file, line, "malloc", 1, size, ptr, NULL);
+ return ptr;
+}
+
+void *
+mandoc_dbg_realloc(void *ptr, size_t size, const char *file, int line)
+{
+ dhash_purge(file, line, "realloc", ptr);
+ ptr = mandoc_realloc(ptr, size);
+ dhash_register(file, line, "realloc", 1, size, ptr, NULL);
+ return ptr;
+}
+
+void *
+mandoc_dbg_reallocarray(void *ptr, size_t num, size_t size,
+ const char *file, int line)
+{
+ dhash_purge(file, line, "reallocarray", ptr);
+ ptr = mandoc_reallocarray(ptr, num, size);
+ dhash_register(file, line, "reallocarray", num, size, ptr, NULL);
+ return ptr;
+}
+
+void *
+mandoc_dbg_recallocarray(void *ptr, size_t oldnum, size_t num, size_t size,
+ const char *file, int line)
+{
+ dhash_purge(file, line, "recallocarray", ptr);
+ ptr = mandoc_recallocarray(ptr, oldnum, num, size);
+ dhash_register(file, line, "recallocarray", num, size, ptr, NULL);
+ return ptr;
+}
+
+char *
+mandoc_dbg_strdup(const char *ptr, const char *file, int line)
+{
+ char *p = mandoc_strdup(ptr);
+ dhash_register(file, line, "strdup", 1, strlen(p) + 1, p, ptr);
+ return p;
+}
+
+char *
+mandoc_dbg_strndup(const char *ptr, size_t sz, const char *file, int line)
+{
+ char *p = mandoc_strndup(ptr, sz);
+ dhash_register(file, line, "strndup", 1, strlen(p) + 1, p, NULL);
+ return p;
+}
+
+void
+mandoc_dbg_free(void *ptr, const char *file, int line)
+{
+ dhash_purge(file, line, "free", ptr);
+ free(ptr);
+}
+
+
+/* *** Memory allocation callbacks for the debugging table. *************** */
+
+static void *
+dhash_alloc(size_t sz, void *arg)
+{
+ return malloc(sz);
+}
+
+static void *
+dhash_calloc(size_t n, size_t sz, void *arg)
+{
+ return calloc(n, sz);
+}
+
+static void
+dhash_free(void *p, void *arg)
+{
+ free(p);
+}
+
+
+/* *** Debugging utility functions. *************************************** */
+
+/* Initialize the debugging table, to be called from the top of main(). */
+void
+mandoc_dbg_init(int argc, char *argv[])
+{
+ struct ohash_info info;
+ char *dhash_fn;
+ int argi;
+
+ info.alloc = dhash_alloc;
+ info.calloc = dhash_calloc;
+ info.free = dhash_free;
+ info.data = NULL;
+ info.key_offset = offsetof(struct dhash_entry, ptr);
+ ohash_init(&dhash_table, 18, &info);
+
+ dhash_fp = stderr;
+ if ((dhash_fn = getenv("DEBUG_MEMORY")) == NULL)
+ return;
+
+ dhash_sflag = 1;
+ for(;; dhash_fn++) {
+ switch (*dhash_fn) {
+ case '\0':
+ break;
+ case 'A':
+ dhash_aflag = 1;
+ continue;
+ case 'F':
+ dhash_fflag = 1;
+ continue;
+ case 'L':
+ dhash_lflag = 1;
+ continue;
+ case 'N':
+ dhash_nflag = 1;
+ continue;
+ case '/':
+ if ((dhash_fp = fopen(dhash_fn, "a+e")) == NULL)
+ err((int)MANDOCLEVEL_SYSERR, "%s", dhash_fn);
+ break;
+ default:
+ errx((int)MANDOCLEVEL_BADARG,
+ "invalid char '%c' in $DEBUG_MEMORY",
+ *dhash_fn);
+ }
+ break;
+ }
+ if (setvbuf(dhash_fp, NULL, _IOLBF, 0) != 0)
+ err((int)MANDOCLEVEL_SYSERR, "setvbuf");
+
+ fprintf(dhash_fp, "P %d", getpid());
+ for (argi = 0; argi < argc; argi++)
+ fprintf(dhash_fp, " [%s]", argv[argi]);
+ fprintf(dhash_fp, "\n");
+}
+
+void
+mandoc_dbg_name(const char *name)
+{
+ if (dhash_nflag)
+ fprintf(dhash_fp, "N %s\n", name);
+}
+
+/* Hash a pointer and return the table slot currently used for it. */
+static unsigned int
+dhash_slot(void *ptr)
+{
+ const char *ks, *ke;
+ uint32_t hv;
+
+ ks = (const char *)&ptr;
+ ke = ks + sizeof(ptr);
+ hv = ohash_interval(ks, &ke);
+ return ohash_lookup_memory(&dhash_table, ks, sizeof(ptr), hv);
+}
+
+/* Record one allocation in the debugging table. */
+static void
+dhash_register(const char *file, int line, const char *func,
+ size_t num, size_t size, void *ptr, const char *str)
+{
+ struct dhash_entry *e;
+ unsigned int slot;
+
+ slot = dhash_slot(ptr);
+ e = ohash_find(&dhash_table, slot);
+ if (dhash_aflag || e != NULL) {
+ fprintf(dhash_fp, "A %s:%d %s(%zu, %zu) = %p",
+ file, line, func, num, size, ptr);
+ if (str != NULL)
+ fprintf(dhash_fp, " \"%s\"", str);
+ fprintf(dhash_fp, "\n");
+ }
+ if (e != NULL) {
+ dhash_print(e);
+ fprintf(dhash_fp, "E duplicate address %p\n", e->ptr);
+ errx((int)MANDOCLEVEL_BADARG, "duplicate address %p", e->ptr);
+ }
+
+ if ((e = malloc(sizeof(*e))) == NULL)
+ err(1, NULL);
+ e->file = file;
+ e->line = line;
+ e->func = func;
+ e->num = num;
+ e->size = size;
+ e->ptr = ptr;
+
+ ohash_insert(&dhash_table, slot, e);
+}
+
+/* Remove one allocation from the debugging table. */
+static void
+dhash_purge(const char *file, int line, const char *func, void *ptr)
+{
+ struct dhash_entry *e;
+ unsigned int slot;
+
+ if (ptr == NULL)
+ return;
+
+ if (dhash_fflag)
+ fprintf(dhash_fp, "F %s:%d %s(%p)\n", file, line, func, ptr);
+
+ slot = dhash_slot(ptr);
+ e = ohash_remove(&dhash_table, slot);
+ free(e);
+}
+
+/* Pretty-print information about one allocation. */
+static void
+dhash_print(struct dhash_entry *e)
+{
+ fprintf(dhash_fp, "L %s:%d %s(%zu, %zu) = %p\n",
+ e->file, e->line, e->func, e->num, e->size, e->ptr);
+}
+
+/* Pretty-print information about all active allocations. */
+void
+mandoc_dbg_finish(void)
+{
+ struct dhash_entry *e;
+ unsigned int errcount, slot;
+
+ errcount = ohash_entries(&dhash_table);
+ e = ohash_first(&dhash_table, &slot);
+ while (e != NULL) {
+ if (dhash_lflag)
+ dhash_print(e);
+ free(e);
+ e = ohash_next(&dhash_table, &slot);
+ }
+ ohash_delete(&dhash_table);
+ if (dhash_sflag)
+ fprintf(dhash_fp, "S %u memory leaks found\n", errcount);
+ if (dhash_fp != stderr)
+ fclose(dhash_fp);
+}
diff --git a/mandoc_dbg.h b/mandoc_dbg.h
new file mode 100644
index 000000000000..33e53be3b197
--- /dev/null
+++ b/mandoc_dbg.h
@@ -0,0 +1,55 @@
+/* $Id: mandoc_dbg.h,v 1.1 2022/04/14 16:43:44 schwarze Exp $ */
+/*
+ * Copyright (c) 2021 Ingo Schwarze <schwarze@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+int mandoc_dbg_asprintf(const char *, int, char **, const char *, ...)
+ __attribute__((__format__ (__printf__, 4, 5)));
+void *mandoc_dbg_calloc(size_t, size_t, const char *, int);
+void *mandoc_dbg_malloc(size_t, const char *, int);
+void *mandoc_dbg_realloc(void *, size_t, const char *, int);
+void *mandoc_dbg_reallocarray(void *, size_t, size_t,
+ const char *, int);
+void *mandoc_dbg_recallocarray(void *, size_t, size_t, size_t,
+ const char *, int);
+char *mandoc_dbg_strdup(const char *, const char *, int);
+char *mandoc_dbg_strndup(const char *, size_t, const char *, int);
+void mandoc_dbg_free(void *, const char *, int);
+
+void mandoc_dbg_init(int argc, char *argv[]);
+void mandoc_dbg_name(const char *);
+void mandoc_dbg_finish(void);
+
+#ifndef DEBUG_NODEF
+#define mandoc_asprintf(dest, fmt, ...) \
+ mandoc_dbg_asprintf(__FILE__, __LINE__, (dest), (fmt), __VA_ARGS__)
+#define mandoc_calloc(num, size) \
+ mandoc_dbg_calloc((num), (size), __FILE__, __LINE__)
+#define mandoc_malloc(size) \
+ mandoc_dbg_malloc((size), __FILE__, __LINE__)
+#define mandoc_realloc(ptr, size) \
+ mandoc_dbg_realloc((ptr), (size), __FILE__, __LINE__)
+#define mandoc_reallocarray(ptr, num, size) \
+ mandoc_dbg_reallocarray((ptr), (num), (size), __FILE__, __LINE__)
+#define mandoc_recallocarray(ptr, old, num, size) \
+ mandoc_dbg_recallocarray((ptr), (old), (num), (size), \
+ __FILE__, __LINE__)
+#define mandoc_strdup(ptr) \
+ mandoc_dbg_strdup((ptr), __FILE__, __LINE__)
+#define mandoc_strndup(ptr, size) \
+ mandoc_dbg_strndup((ptr), (size), __FILE__, __LINE__)
+#define free(ptr) \
+ mandoc_dbg_free((ptr), __FILE__, __LINE__)
+#endif
diff --git a/mandoc_dbg_init.3 b/mandoc_dbg_init.3
new file mode 100644
index 000000000000..6f9660db58d5
--- /dev/null
+++ b/mandoc_dbg_init.3
@@ -0,0 +1,280 @@
+.\" $Id: mandoc_dbg_init.3,v 1.1 2022/04/14 16:43:44 schwarze Exp $
+.\"
+.\" Copyright (c) 2021, 2022 Ingo Schwarze <schwarze@openbsd.org>
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate: April 14 2022 $
+.Dt MANDOC_DBG_INIT 3
+.Os
+.Sh NAME
+.Nm mandoc_dbg_init ,
+.Nm mandoc_dbg_name ,
+.Nm mandoc_dbg_finish
+.Nd search for memory leaks in mandoc
+.Sh SYNOPSIS
+.Ft void
+.Fn mandoc_dbg_init "int argc" "char *argv[]"
+.Ft void
+.Fn mandoc_dbg_name "const char *"
+.Ft void
+.Fn mandoc_dbg_finish void
+.Sh DESCRIPTION
+If the mandoc package is built with the line
+.Ql DEBUG_MEMORY=1
+in the file
+.Pa configure.local ,
+the functions documented in
+.Xr mandoc_malloc 3
+and the function
+.Xr free 3
+are instrumented to record every memory allocation in a dedicated
+hash table and to check that every allocation is freed again.
+This compile time option is only intended for binaries that are
+used exclusively for debugging.
+It is not intended for production binaries because it significantly
+increases run time and memory usage and makes the programs more
+fragile and more error-prone.
+.Pp
+The function
+.Fn mandoc_dbg_init
+initializes the memory debugging subsystem.
+It is called from the top of the
+.Fn main
+programs, passing through the arguments that
+.Fn main
+received.
+The
+.Sx ENVIRONMENT
+section of the present manual page explains how the
+.Ev DEBUG_MEMORY
+environment variable controls the amount and destination of reporting.
+.Pp
+The function
+.Fn mandoc_dbg_name
+is called from the
+.Xr mdoc 7
+and
+.Xr man 7
+parsers whenever a
+.Ic \&Dt
+or
+.Ic \&TH
+macro is parsed, passing the complete macro line as the argument.
+.Pp
+The function
+.Fn mandoc_dbg_finish
+performs cleanup and optionally final reporting.
+It is called from the end of the
+.Fn main
+programs, just before normal termination.
+.Pp
+Getting the
+.Sy #include
+directives right for these functions is slightly tricky.
+If a file already includes
+.Qq Pa mandoc_aux.h ,
+no additional directive is needed because
+.Qq Pa mandoc_aux.h
+already includes
+.Qq Pa mandoc_dgb.h
+if
+.Ql DEBUG_MEMORY=1
+is set in
+.Pa configure.local .
+.Pp
+If a file does not need
+.Qq Pa mandoc_aux.h
+but calls a function documented in the present manual page and also calls
+.Xr free 3
+directly, it needs this code before the other
+.Xr mandoc_headers 3 :
+.Bd -literal -offset indent
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
+.Ed
+.Pp
+If a file calls a function documented in the present manual page
+but does not directly call
+.Xr free 3 ,
+it can use this less intrusive idiom:
+.Bd -literal -offset indent
+#if DEBUG_MEMORY
+#define DEBUG_NODEF
+#include "mandoc_dbg.h"
+#endif
+.Ed
+.Sh ENVIRONMENT
+The environment variable
+.Ev DEBUG_MEMORY
+controls the amount and destination of reporting.
+.Pp
+If it is unset, diagnostic output is directed to standard error output
+and only fatal errors are reported.
+Even though full memory accounting is always performed
+by any binary that was compiled with
+.Ql DEBUG_MEMORY=1 ,
+resulting in a significant increase in both run time and memory usage,
+memory leaks are
+.Em not
+reported when
+.Ev DEBUG_MEMORY
+is not set at run time.
+.Pp
+If
+.Ev DEBUG_MEMORY
+is set, it is interpreted as a string of flags.
+The flags are as follows:
+.Bl -tag -width 1n
+.It Cm A
+Log every allocation.
+This produces huge amounts of output and is usually not needed
+to find memory leaks.
+Its main purpose is debugging the memory debugging subsystem itself.
+.Pp
+When enabled, allocations are logged in this format:
+.Pp
+.D1 Cm A Ar file Ns .c: Ns Ar line function Ns Po Fa nmemb , size Pc\
+ No = Ar address
+.Pp
+The meaning of the fields is the same as for the
+.Cm L
+option.
+.It Cm F
+Log every
+.Xr free 3
+and every reallocation where the memory to be released or reallocated
+was allocated with one of the functions documented in
+.Xr mandoc_malloc 3 .
+Again, this produces huge amounts of output and is usually not
+needed to find memory leaks, and its main purpose is debugging the
+memory debugging subsystem itself.
+.Pp
+The logging format is:
+.Pp
+.D1 Cm F Ar file Ns .c: Ns Ar line function Ns Pq address
+.Pp
+It provides the name of the
+.Ar file
+and the number of the
+.Ar line
+in that file which called the
+.Xr free 3
+or reallocation
+.Ar function ,
+and the
+.Fa address
+that was given as an argument.
+.Pp
+If both the
+.Cm A
+and the
+.Cm F
+flags are enabled, calls to reallocation functions often log two lines,
+first an
+.Cm F
+line reporting the address passed in as an argument, then an
+.Cm A
+line reporting the adress returned as the function return value.
+.It Cm L
+Log every memory leak.
+For every allocation made after
+.Fn mandoc_dbg_init
+using functions documented in
+.Xr mandoc_malloc 3
+that was not freed before
+.Fn mandoc_dbg_finish ,
+print a line in this format:
+.Pp
+.D1 Cm L Ar file Ns .c: Ns Ar line function Ns Po Fa nmemb , size Pc\
+ No = Ar address
+.Pp
+It provides the name of the
+.Ar file
+and the number of the
+.Ar line
+in that file which called the allocation
+.Ar function
+with the arguments
+.Fa nmemb
+and
+.Fa size
+documented for
+.Xr calloc 3 .
+If the
+.Ar function
+does not take an
+.Fa nmemb
+argument,
+.Fa nmemb
+is reported as 1.
+At the end of the line, the virtual
+.Ar address
+of the memory returned from the allocation function is reported.
+.It Cm N
+Log the names of manual pages processed in the following formats:
+.Bd -unfilled -offset indent
+.Cm N Pf . Ic \&Dt Ar name section Op Ar architecture
+.Cm N Pf . Ic \&TH Ar name section Op Ar additional arguments
+.Ed
+.Pp
+This is particularly useful if a program crashes, runs out of memory,
+or enters an infinite loop.
+The last
+.Cm N
+line logged often indicates the input file triggering the problem.
+.It Cm /
+Interpret the rest of
+.Ev DEBUG_MEMORY
+as an absolute path and redirect debugging output to that file,
+appending to the file if it already exists or creating it otherwise.
+.El
+.Pp
+If
+.Ev DEBUG_MEMORY
+is set, even if it is empty,
+.Fn mandoc_dbg_init
+always writes the line
+.Pp
+.D1 Cm P Ar pid Sy \&[ Ns Ar progname Ns Sy \&]\
+ Sy \&[ Ns Ar argument Ns Sy \&] Ar ...
+.Pp
+enclosing each element of
+.Fa argv
+in square brackets, to avoid that arguments containing whitespace
+appear in the same way as multiple arguments, and
+.Fn mandoc_dbg_finish
+always writes the line:
+.Pp
+.D1 Cm S Ar number No memory leaks found
+.Sh EXAMPLES
+The following is a typical sequence of commands for finding memory
+leaks in the parsers, in the HTML formatter, and in the regression suite:
+.Bd -literal -offset indent
+make distclean
+echo BUILD_CATMAN=1 >> configure.local
+echo DEBUG_MEMORY=1 >> configure.local
+\&./configure
+make
+export DEBUG_MEMORY=NL/tmp/mandoc.debug.txt
+mkdir Out
+export PATH=$PATH:$(pwd)
+\&./catman -T html /usr/share/man Out
+make regress-clean
+make regress
+less /tmp/mandoc.debug.txt
+.Ed
+.Sh SEE ALSO
+.Xr mandoc_malloc 3 ,
+.Xr catman 8
diff --git a/mandoc_escape.3 b/mandoc_escape.3
index 90b672c9d29b..1ff1e68dd06c 100644
--- a/mandoc_escape.3
+++ b/mandoc_escape.3
@@ -1,4 +1,4 @@
-.\" $Id: mandoc_escape.3,v 1.4 2017/07/04 23:40:01 schwarze Exp $
+.\" $Id: mandoc_escape.3,v 1.6 2023/10/23 14:46:22 schwarze Exp $
.\"
.\" Copyright (c) 2014 Ingo Schwarze <schwarze@openbsd.org>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: July 4 2017 $
+.Dd $Mdocdate: October 23 2023 $
.Dt MANDOC_ESCAPE 3
.Os
.Sh NAME
@@ -80,12 +80,12 @@ that can be used as quoting characters.
.El
.Pp
Upon function entry,
-.Fa end
+.Pf * Fa end
is expected to point to the escape sequence identifier.
The values passed in as
-.Fa start
+.Pf * Fa start
and
-.Fa sz
+.Pf * Fa sz
are ignored and overwritten.
.Pp
By design, this function cannot handle those
@@ -100,9 +100,11 @@ width measurements
and numerical expression control
.Ic \eB .
These are handled by
-.Fn roff_res ,
+.Fn roff_expand ,
a private preprocessor function called from
-.Fn roff_parseln ,
+.Fn roff_parseln
+and
+.Fn roff_getarg ,
see the file
.Pa roff.c .
.Pp
@@ -114,13 +116,22 @@ is used
recursively by itself, because some escape sequence arguments can
in turn contain other escape sequences,
.It
-for error detection internally by the
+for parsing and error detection internally by the
.Xr roff 7
parser part of the
.Xr mandoc 3
library, see the file
.Pa roff.c ,
.It
+occasionally by high-level parser and validation modules when they
+need to skip escape sequences while scanning the input, see the files
+.Pa mdoc.c ,
+.Pa man.c ,
+.Pa man_validate.c ,
+.Pa eqn.c ,
+and
+.Pa tbl_data.c
+.It
above all externally by the
.Xr mandoc 1
formatting modules, in particular
@@ -139,19 +150,19 @@ to purge escape sequences from text.
.El
.Sh RETURN VALUES
Upon function return, the pointer
-.Fa end
+.Pf * Fa end
is set to the character after the end of the escape sequence,
such that the calling higher-level parser can easily continue.
.Pp
For escape sequences taking an argument, the pointer
-.Fa start
+.Pf * Fa start
is set to the beginning of the argument and
-.Fa sz
+.Pf * Fa sz
is set to the length of the argument.
For escape sequences not taking an argument,
-.Fa start
+.Pf * Fa start
is set to the character after the end of the sequence and
-.Fa sz
+.Pf * Fa sz
is set to 0.
Both
.Fa start
@@ -165,6 +176,11 @@ For sequences taking an argument, the function
.Fn mandoc_escape
returns one of the following values:
.Bl -tag -width 2n
+.It Dv ESCAPE_DEVICE
+The escape sequence
+.Ic \e*(.T
+or
+.Ic \e*[.T] .
.It Dv ESCAPE_FONT
The escape sequence
.Ic \ef
@@ -183,6 +199,33 @@ More specific values are returned for the most commonly used arguments:
.It Cm P Ta Dv ESCAPE_FONTPREV
.It Cm BI Ta Dv ESCAPE_FONTBI
.El
+.It Dv ESCAPE_HLINE
+The escape sequence
+.Ic \eh
+followed by an argument delimited by an arbitrary character.
+.It Dv ESCAPE_HORIZ
+The escape sequence
+.Ic \el
+followed by an argument delimited by an arbitrary character.
+.It Dv ESCAPE_NUMBERED
+The escape sequence
+.Ic \eN
+followed by a delimited argument.
+The delimiter character is arbitrary except that digits cannot be used.
+If a digit is encountered instead of the opening delimiter, that
+digit is considered to be the argument and the end of the sequence, and
+.Dv ESCAPE_IGNORE
+is returned.
+.Pp
+Such ASCII character escape sequences can be rendered using the function
+.Fn mchars_num2char
+described in the
+.Xr mchars_alloc 3
+manual.
+.It Dv ESCAPE_OVERSTRIKE
+The escape sequence
+.Ic \eo
+followed by an argument delimited by an arbitrary character.
.It Dv ESCAPE_SPECIAL
The escape sequence
.Ic \eC
@@ -225,11 +268,11 @@ are hexadecimal digits and
is not zero:
.Ic \eC'u , \e[u .
As a special exception,
-.Fa start
+.Pf * Fa start
is set to the character after the
.Ic u ,
and the
-.Fa sz
+.Pf * Fa sz
return value does not include the
.Ic u
either.
@@ -239,26 +282,10 @@ Such Unicode character escape sequences can be rendered using the function
described in the
.Xr mchars_alloc 3
manual.
-.It Dv ESCAPE_NUMBERED
-The escape sequence
-.Ic \eN
-followed by a delimited argument.
-The delimiter character is arbitrary except that digits cannot be used.
-If a digit is encountered instead of the opening delimiter, that
-digit is considered to be the argument and the end of the sequence, and
-.Dv ESCAPE_IGNORE
-is returned.
-.Pp
-Such ASCII character escape sequences can be rendered using the function
-.Fn mchars_num2char
-described in the
-.Xr mchars_alloc 3
-manual.
-.It Dv ESCAPE_OVERSTRIKE
-The escape sequence
-.Ic \eo
-followed by an argument delimited by an arbitrary character.
.It Dv ESCAPE_IGNORE
+Many escape sequences that
+.Xr mandoc 1
+intends to ignore, in particular:
.Bl -bullet -width 2n
.It
The escape sequence
@@ -276,18 +303,15 @@ for all forms.
.It
The escape sequences
.Ic \eF ,
-.Ic \eg ,
.Ic \ek ,
.Ic \eM ,
.Ic \em ,
-.Ic \en ,
-.Ic \eV ,
+.Ic \eO ,
and
.Ic \eY
followed by an argument in standard form.
.It
The escape sequences
-.Ic \eA ,
.Ic \eb ,
.Ic \eD ,
.Ic \eR ,
@@ -298,9 +322,7 @@ followed by an argument delimited by an arbitrary character.
.It
The escape sequences
.Ic \eH ,
-.Ic \eh ,
.Ic \eL ,
-.Ic \el ,
.Ic \eS ,
.Ic \ev ,
and
@@ -312,9 +334,21 @@ is found instead of a delimiter, the sequence is considered to end
with that character, and
.Dv ESCAPE_ERROR
is returned.
+.It
+The escape sequences
+.Ic \eO
+with a single-digit argument in the range from 1 to 4 inclusive.
.El
+.It Dv ESCAPE_UNSUPP
+An escape sequence that
+.Xr mandoc 1
+can parse, but for which formatting in unsupported, in particular
+.Qq \eO0
+and
+.Qq \eO5 .
.It Dv ESCAPE_ERROR
-Escape sequences taking an argument but not matching any of the above patterns.
+Escape sequences taking an argument
+where the actual argument contains a syntax error.
In particular, that happens if the end of the logical input line
is reached before the end of the argument.
.El
@@ -323,17 +357,45 @@ For sequences that do not take an argument, the function
.Fn mandoc_escape
returns one of the following values:
.Bl -tag -width 2n
-.It Dv ESCAPE_SKIPCHAR
+.It Dv ESCAPE_BREAK
The escape sequence
-.Qq \ez .
+.Qq \ep .
+.It Dv ESCAPE_IGNORE
+Many escape sequences including
+.Qq \e% ,
+.Qq \e& ,
+.Qq \e| ,
+.Qq \ed ,
+and
+.Qq \eu .
.It Dv ESCAPE_NOSPACE
The escape sequence
.Qq \ec .
-.It Dv ESCAPE_IGNORE
+.It Dv ESCAPE_SKIPCHAR
+The escape sequence
+.Qq \ez .
+.It Dv ESCAPE_UNSUPP
The escape sequences
-.Qq \ed
+.Qq \e! ,
+.Qq \e? ,
and
-.Qq \eu .
+.Qq \er .
+.It Dv ESCAPE_UNDEF
+Many escape sequences that other
+.Xr roff 7
+implementations do not define either, for example
+.Qq \eG ,
+.Qq \eI ,
+.Qq \ei ,
+.Qq \eJ ,
+.Qq \ej ,
+.Qq \eK ,
+.Qq \eP ,
+.Qq \eT ,
+.Qq \eU ,
+.Qq \eW ,
+and
+.Qq \ey .
.El
.Sh FILES
This function is implemented in
@@ -347,21 +409,3 @@ This function has been available since mandoc 1.11.2.
.Sh AUTHORS
.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
.An Ingo Schwarze Aq Mt schwarze@openbsd.org
-.Sh BUGS
-The function doesn't cleanly distinguish between sequences that are
-valid and supported, valid and ignored, valid and unsupported,
-syntactically invalid, or undefined.
-For sequences that are ignored or unsupported, it doesn't tell
-whether that deficiency is likely to cause major formatting problems
-and/or loss of document content.
-The function is already rather complicated and still parses some
-sequences incorrectly.
-.
-.ig
-For these sequences, the list given below specifies a starting string
-and either the length of the argument or an ending character.
-The argument starts after the starting string.
-In the former case, the sequence ends with the end of the argument.
-In the latter case, the argument ends before the ending character,
-and the sequence ends with the ending character.
-..
diff --git a/mandoc_headers.3 b/mandoc_headers.3
index 7fe6d379f864..ae01c7bf6bb6 100644
--- a/mandoc_headers.3
+++ b/mandoc_headers.3
@@ -1,6 +1,6 @@
-.\" $Id: mandoc_headers.3,v 1.34 2021/08/10 12:55:03 schwarze Exp $
+.\" $Id: mandoc_headers.3,v 1.35 2022/04/14 16:43:44 schwarze Exp $
.\"
-.\" Copyright (c) 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
+.\" Copyright (c) 2014-2022 Ingo Schwarze <schwarze@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: August 10 2021 $
+.Dd $Mdocdate: April 14 2022 $
.Dt MANDOC_HEADERS 3
.Os
.Sh NAME
@@ -25,8 +25,8 @@ To support a cleaner coding style, the mandoc header files do not
contain any include directives and do not guard against multiple
inclusion.
The application developer has to make sure that the headers are
-included in a proper order, and that no header is included more
-than once.
+included in the order shown in this manual page,
+and that no header is included more than once.
.Pp
The headers and functions form three major groups:
.Sx Parser interface ,
@@ -83,6 +83,33 @@ for
.Pp
Provides the functions documented in
.Xr mandoc_malloc 3 .
+.Pp
+When this header is included, the same file must not include
+.Qq Pa mandoc_dbg.h
+because
+.Qq Pa mandoc_aux.h
+automatically includes
+.Qq Pa mandoc_dbg.h
+if and only if the preprocessor symbol
+.Dv DEBUG_MEMORY
+is defined.
+.It Qq Pa mandoc_dbg.h
+Debugging utility functions and
+debugging wrappers around memory allocation functions.
+.Pp
+Requires
+.In sys/types.h
+for
+.Vt size_t .
+.Pp
+Provides the functions documented in
+.Xr mandoc_dbg_init 3 .
+.Pp
+This header must not be included unless the preprocessor symbol
+.Dv DEBUG_MEMORY
+is defined.
+When this header is included, the same file must not include
+.Qq Pa mandoc_aux.h .
.It Qq Pa mandoc_ohash.h
Hashing utility functions; can be used everywhere.
.Pp
diff --git a/mandoc_html.3 b/mandoc_html.3
index a77d0e04fce4..a7c8d7960c4b 100644
--- a/mandoc_html.3
+++ b/mandoc_html.3
@@ -1,4 +1,4 @@
-.\" $Id: mandoc_html.3,v 1.23 2020/04/24 13:13:06 schwarze Exp $
+.\" $Id: mandoc_html.3,v 1.24 2022/06/24 11:15:53 schwarze Exp $
.\"
.\" Copyright (c) 2014, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: April 24 2020 $
+.Dd $Mdocdate: June 24 2022 $
.Dt MANDOC_HTML 3
.Os
.Sh NAME
@@ -221,6 +221,10 @@ option.
Print an
.Cm id
attribute.
+.It Cm r
+Print an ARIA
+.Cm role
+attribute.
.It Cm \&?
Print an arbitrary attribute.
This format letter requires two
diff --git a/mandoc_msg.c b/mandoc_msg.c
index beec5059a28c..baa709c70c83 100644
--- a/mandoc_msg.c
+++ b/mandoc_msg.c
@@ -1,6 +1,6 @@
/* $OpenBSD: mandoc_msg.c,v 1.8 2020/01/19 17:59:01 schwarze Exp $ */
/*
- * Copyright (c) 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2014-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -166,13 +166,14 @@ static const char *const type_message[MANDOCERR_MAX] = {
"invalid Boolean argument",
"argument contains two font escapes",
"unknown font, skipping request",
+ "ignoring distance argument",
"odd number of characters in request",
/* related to plain text */
"blank line in fill mode, using .sp",
"tab in filled text",
"new sentence, new line",
- "invalid escape sequence",
+ "invalid escape sequence argument",
"undefined escape, printing literally",
"undefined string, using \"\"",
@@ -216,11 +217,13 @@ static const char *const type_message[MANDOCERR_MAX] = {
"escaped character not allowed in a name",
"using macro argument outside macro",
"argument number is not numeric",
+ "negative argument, using 0",
"NOT IMPLEMENTED: Bd -file",
"skipping display without arguments",
"missing list type, using -item",
"argument is not numeric, using 1",
"argument is not a character",
+ "skipping unusable escape sequence",
"missing manual name, using \"\"",
"uname(3) system call failed, using UNKNOWN",
"unknown standard specifier",
@@ -233,6 +236,12 @@ static const char *const type_message[MANDOCERR_MAX] = {
"skipping excess arguments",
"divide by zero",
+ /* related to escape sequences */
+ "incomplete escape sequence",
+ "invalid special character",
+ "unknown special character",
+ "invalid escape argument delimiter",
+
"unsupported feature",
"input too large",
"unsupported control character",
diff --git a/mandocd.8 b/mandocd.8
index 2ea18cfc9f07..d679deb1b9e4 100644
--- a/mandocd.8
+++ b/mandocd.8
@@ -1,4 +1,4 @@
-.\" $Id: mandocd.8,v 1.2 2017/03/18 19:56:01 schwarze Exp $
+.\" $Id: mandocd.8,v 1.3 2021/09/28 15:41:41 schwarze Exp $
.\"
.\" Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org>
.\"
@@ -14,7 +14,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: March 18 2017 $
+.Dd $Mdocdate: September 28 2021 $
.Dt MANDOCD 8
.Os
.Sh NAME
@@ -174,7 +174,7 @@ will simply accept and process the next input file descriptor.
.Sh HISTORY
The
.Nm
-utility appeared in version 1.14.1 or the
+utility appeared in version 1.14.1 of the
.Sy mandoc
toolkit.
.Sh AUTHORS
@@ -194,5 +194,5 @@ requests,
needs to be started with the current working directory set to the
root of the manual page tree.
Avoid starting it in directories that contain secret files in any
-subdirectories, in particular in the user starting it has read
+subdirectories, in particular if the user starting it has read
access to these secret files.
diff --git a/mandocd.c b/mandocd.c
index 60e40fed8023..ccc846bd0310 100644
--- a/mandocd.c
+++ b/mandocd.c
@@ -1,7 +1,7 @@
-/* $Id: mandocd.c,v 1.12 2020/06/14 23:40:31 schwarze Exp $ */
+/* $Id: mandocd.c,v 1.13 2022/04/14 16:43:44 schwarze Exp $ */
/*
* Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
- * Copyright (c) 2017, 2019 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2017, 2019, 2021 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -35,6 +35,10 @@
#include <unistd.h>
#include "mandoc.h"
+#if DEBUG_MEMORY
+#define DEBUG_NODEF 1
+#include "mandoc_dbg.h"
+#endif
#include "roff.h"
#include "mdoc.h"
#include "man.h"
@@ -129,6 +133,10 @@ main(int argc, char *argv[])
int state, opt;
enum outt outtype;
+#if DEBUG_MEMORY
+ mandoc_dbg_init(argc, argv);
+#endif
+
defos = NULL;
outtype = OUTT_ASCII;
while ((opt = getopt(argc, argv, "I:T:")) != -1) {
@@ -240,6 +248,9 @@ main(int argc, char *argv[])
}
mparse_free(parser);
mchars_free();
+#if DEBUG_MEMORY
+ mandoc_dbg_finish();
+#endif
return state == -1 ? 1 : 0;
}
diff --git a/mandocdb.c b/mandocdb.c
index a3360fe44d01..c26a38a5f233 100644
--- a/mandocdb.c
+++ b/mandocdb.c
@@ -1,6 +1,6 @@
-/* $Id: mandocdb.c,v 1.269 2021/08/19 16:55:31 schwarze Exp $ */
+/* $Id: mandocdb.c,v 1.274 2024/05/14 21:19:12 schwarze Exp $ */
/*
- * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011-2021, 2024 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
*
@@ -173,7 +173,7 @@ static void say(const char *, const char *, ...)
__attribute__((__format__ (__printf__, 2, 3)));
static int set_basedir(const char *, int);
static int treescan(void);
-static size_t utf8(unsigned int, char [7]);
+static size_t utf8(unsigned int, char[5]);
static int nodb; /* no database changes */
static int mparse_options; /* abort the parse early */
@@ -353,7 +353,7 @@ mandocdb(int argc, char *argv[])
goto usage; \
} while (/*CONSTCOND*/0)
- mparse_options = MPARSE_VALIDATE;
+ mparse_options = MPARSE_UTF8 | MPARSE_LATIN1 | MPARSE_VALIDATE;
path_arg = NULL;
op = OP_DEFAULT;
@@ -532,6 +532,9 @@ out:
mpages_free();
ohash_delete(&mpages);
ohash_delete(&mlinks);
+#if DEBUG_MEMORY
+ mandoc_dbg_finish();
+#endif
return exitcode;
usage:
progname = getprogname();
@@ -801,7 +804,7 @@ filescan(const char *infile)
* We have to do lstat(2) before realpath(3) loses
* the information whether this is a symbolic link.
* We need to know that because for symbolic links,
- * we want to use the orginal file name, while for
+ * we want to use the original file name, while for
* regular files, we want to use the real path.
*/
if (lstat(infile, &st) == -1) {
@@ -1904,49 +1907,35 @@ putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v)
* Take a Unicode codepoint and produce its UTF-8 encoding.
* This isn't the best way to do this, but it works.
* The magic numbers are from the UTF-8 packaging.
- * They're not as scary as they seem: read the UTF-8 spec for details.
+ * Read the UTF-8 spec or the utf8(7) manual page for details.
*/
static size_t
-utf8(unsigned int cp, char out[7])
+utf8(unsigned int cp, char out[5])
{
size_t rc;
- rc = 0;
- if (cp <= 0x0000007F) {
+ if (cp <= 0x7f) {
rc = 1;
out[0] = (char)cp;
- } else if (cp <= 0x000007FF) {
+ } else if (cp <= 0x7ff) {
rc = 2;
out[0] = (cp >> 6 & 31) | 192;
out[1] = (cp & 63) | 128;
- } else if (cp <= 0x0000FFFF) {
+ } else if (cp >= 0xd800 && cp <= 0xdfff) {
+ rc = 0; /* reject UTF-16 surrogate */
+ } else if (cp <= 0xffff) {
rc = 3;
out[0] = (cp >> 12 & 15) | 224;
out[1] = (cp >> 6 & 63) | 128;
out[2] = (cp & 63) | 128;
- } else if (cp <= 0x001FFFFF) {
+ } else if (cp <= 0x10ffff) {
rc = 4;
out[0] = (cp >> 18 & 7) | 240;
out[1] = (cp >> 12 & 63) | 128;
out[2] = (cp >> 6 & 63) | 128;
out[3] = (cp & 63) | 128;
- } else if (cp <= 0x03FFFFFF) {
- rc = 5;
- out[0] = (cp >> 24 & 3) | 248;
- out[1] = (cp >> 18 & 63) | 128;
- out[2] = (cp >> 12 & 63) | 128;
- out[3] = (cp >> 6 & 63) | 128;
- out[4] = (cp & 63) | 128;
- } else if (cp <= 0x7FFFFFFF) {
- rc = 6;
- out[0] = (cp >> 30 & 1) | 252;
- out[1] = (cp >> 24 & 63) | 128;
- out[2] = (cp >> 18 & 63) | 128;
- out[3] = (cp >> 12 & 63) | 128;
- out[4] = (cp >> 6 & 63) | 128;
- out[5] = (cp & 63) | 128;
} else
- return 0;
+ rc = 0;
out[rc] = '\0';
return rc;
@@ -2028,7 +2017,21 @@ render_string(char **public, size_t *psz)
*/
scp++;
- if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL)
+ switch (mandoc_escape(&scp, &seq, &seqlen)) {
+ case ESCAPE_UNICODE:
+ unicode = mchars_num2uc(seq + 1, seqlen - 1);
+ break;
+ case ESCAPE_NUMBERED:
+ unicode = mchars_num2char(seq, seqlen);
+ break;
+ case ESCAPE_SPECIAL:
+ unicode = mchars_spec2cp(seq, seqlen);
+ break;
+ default:
+ unicode = -1;
+ break;
+ }
+ if (unicode <= 0)
continue;
/*
@@ -2037,21 +2040,17 @@ render_string(char **public, size_t *psz)
*/
if (write_utf8) {
- unicode = mchars_spec2cp(seq, seqlen);
- if (unicode <= 0)
- continue;
addsz = utf8(unicode, utfbuf);
if (addsz == 0)
continue;
addcp = utfbuf;
} else {
- addcp = mchars_spec2str(seq, seqlen, &addsz);
+ addcp = mchars_uc2str(unicode);
if (addcp == NULL)
continue;
- if (*addcp == ASCII_NBRSP) {
+ if (*addcp == ASCII_NBRSP)
addcp = " ";
- addsz = 1;
- }
+ addsz = strlen(addcp);
}
/* Copy the rendered glyph into the stream. */
@@ -2251,11 +2250,11 @@ dbwrite(struct dba *dba)
say(tfn, "&dba_write");
goto err;
}
- if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) {
+ if ((fd1 = open(MANDOC_DB, O_RDONLY)) == -1) {
say(MANDOC_DB, "&open");
goto err;
}
- if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) {
+ if ((fd2 = open(tfn, O_RDONLY)) == -1) {
say(tfn, "&open");
goto err;
}
diff --git a/manpath.c b/manpath.c
index 255d748246d6..3760e2293c3a 100644
--- a/manpath.c
+++ b/manpath.c
@@ -1,4 +1,4 @@
-/* $Id: manpath.c,v 1.43 2020/08/27 14:59:47 schwarze Exp $ */
+/* $Id: manpath.c,v 1.44 2021/11/05 18:03:08 schwarze Exp $ */
/*
* Copyright (c) 2011,2014,2015,2017-2019 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -31,63 +31,51 @@
#include "mandoc.h"
#include "manconf.h"
-static void manconf_file(struct manconf *, const char *);
+static void manconf_file(struct manconf *, const char *, int);
static void manpath_add(struct manpaths *, const char *, char);
static void manpath_parseline(struct manpaths *, char *, char);
void
-manconf_parse(struct manconf *conf, const char *file,
- char *defp, char *auxp)
+manconf_parse(struct manconf *conf, const char *file, char *pend, char *pbeg)
{
- char *insert;
+ int use_path_from_file = 1;
/* Always prepend -m. */
- manpath_parseline(&conf->manpath, auxp, 'm');
-
- /* If -M is given, it overrides everything else. */
- if (NULL != defp) {
- manpath_parseline(&conf->manpath, defp, 'M');
- return;
- }
-
- /* MANPATH and man.conf(5) cooperate. */
- defp = getenv("MANPATH");
- if (NULL == file)
- file = MAN_CONF_FILE;
-
- /* No MANPATH; use man.conf(5) only. */
- if (NULL == defp || '\0' == defp[0]) {
- manconf_file(conf, file);
- return;
- }
-
- /* Prepend man.conf(5) to MANPATH. */
- if (':' == defp[0]) {
- manconf_file(conf, file);
- manpath_parseline(&conf->manpath, defp, '\0');
- return;
+ manpath_parseline(&conf->manpath, pbeg, 'm');
+
+ if (pend != NULL && *pend != '\0') {
+ /* If -M is given, it overrides everything else. */
+ manpath_parseline(&conf->manpath, pend, 'M');
+ use_path_from_file = 0;
+ pbeg = pend = NULL;
+ } else if ((pbeg = getenv("MANPATH")) == NULL || *pbeg == '\0') {
+ /* No MANPATH; use man.conf(5) only. */
+ pbeg = pend = NULL;
+ } else if (*pbeg == ':') {
+ /* Prepend man.conf(5) to MANPATH. */
+ pend = pbeg + 1;
+ pbeg = NULL;
+ } else if ((pend = strstr(pbeg, "::")) != NULL) {
+ /* Insert man.conf(5) into MANPATH. */
+ *pend = '\0';
+ pend += 2;
+ } else if (pbeg[strlen(pbeg) - 1] == ':') {
+ /* Append man.conf(5) to MANPATH. */
+ pend = NULL;
+ } else {
+ /* MANPATH overrides man.conf(5) completely. */
+ use_path_from_file = 0;
+ pend = NULL;
}
- /* Append man.conf(5) to MANPATH. */
- if (':' == defp[strlen(defp) - 1]) {
- manpath_parseline(&conf->manpath, defp, '\0');
- manconf_file(conf, file);
- return;
- }
+ manpath_parseline(&conf->manpath, pbeg, '\0');
- /* Insert man.conf(5) into MANPATH. */
- insert = strstr(defp, "::");
- if (NULL != insert) {
- *insert++ = '\0';
- manpath_parseline(&conf->manpath, defp, '\0');
- manconf_file(conf, file);
- manpath_parseline(&conf->manpath, insert + 1, '\0');
- return;
- }
+ if (file == NULL)
+ file = MAN_CONF_FILE;
+ manconf_file(conf, file, use_path_from_file);
- /* MANPATH overrides man.conf(5) completely. */
- manpath_parseline(&conf->manpath, defp, '\0');
+ manpath_parseline(&conf->manpath, pend, '\0');
}
void
@@ -161,7 +149,7 @@ manconf_free(struct manconf *conf)
}
static void
-manconf_file(struct manconf *conf, const char *file)
+manconf_file(struct manconf *conf, const char *file, int use_path_from_file)
{
const char *const toks[] = { "manpath", "output" };
char manpath_default[] = MANPATH_DEFAULT;
@@ -201,7 +189,8 @@ manconf_file(struct manconf *conf, const char *file)
switch (tok) {
case 0: /* manpath */
- manpath_add(&conf->manpath, cp, '\0');
+ if (use_path_from_file)
+ manpath_add(&conf->manpath, cp, '\0');
*manpath_default = '\0';
break;
case 1: /* output */
@@ -215,7 +204,7 @@ manconf_file(struct manconf *conf, const char *file)
fclose(stream);
out:
- if (*manpath_default != '\0')
+ if (use_path_from_file && *manpath_default != '\0')
manpath_parseline(&conf->manpath, manpath_default, '\0');
}
diff --git a/mansearch.c b/mansearch.c
index 59a35771970c..6732257d3660 100644
--- a/mansearch.c
+++ b/mansearch.c
@@ -1,4 +1,4 @@
-/* $Id: mansearch.c,v 1.82 2019/07/01 22:56:24 schwarze Exp $ */
+/* $Id: mansearch.c,v 1.84 2023/04/28 19:11:03 schwarze Exp $ */
/*
* Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2013-2018 Ingo Schwarze <schwarze@openbsd.org>
@@ -220,7 +220,7 @@ mansearch(const struct mansearch *search,
if (cur && search->firstmatch)
break;
}
- if (res != NULL)
+ if (res != NULL && cur > 1)
qsort(*res, cur, sizeof(struct manpage), manpage_compare);
if (chdir_status && getcwd_status && chdir(buf) == -1)
warn("%s", buf);
@@ -491,7 +491,7 @@ lstlen(const char *cp, size_t sep)
/*
* Print the NUL-terminated list of NUL-terminated strings
- * into the buffer, seperating strings with sep.
+ * into the buffer, separating strings with sep.
*/
static void
lstcat(char *buf, size_t *i, const char *cp, const char *sep)
diff --git a/mdoc.7 b/mdoc.7
index 7ac6ff11c00d..0eb697f9f0a7 100644
--- a/mdoc.7
+++ b/mdoc.7
@@ -1,4 +1,4 @@
-.\" $Id: mdoc.7,v 1.287 2021/07/29 17:32:01 schwarze Exp $
+.\" $Id: mdoc.7,v 1.294 2024/09/22 10:34:58 schwarze Exp $
.\"
.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2010, 2011, 2013-2020 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: July 29 2021 $
+.Dd $Mdocdate: September 22 2024 $
.Dt MDOC 7
.Os
.Sh NAME
@@ -439,7 +439,7 @@ in the alphabetical
.Bl -column "Brq, Bro, Brc" description
.It Ic \&Dd Ta document date: Cm $\&Mdocdate$ | Ar month day , year
.It Ic \&Dt Ta document title: Ar TITLE section Op Ar arch
-.It Ic \&Os Ta operating system version: Op Ar system Op Ar version
+.It Ic \&Os Ta operating system footer: Op Ar footer text
.It Ic \&Nm Ta document name (one argument)
.It Ic \&Nd Ta document description (one line)
.El
@@ -1015,7 +1015,7 @@ A numbered list.
No item heads can be specified.
Formatted like
.Fl bullet ,
-except that cardinal numbers are used in place of bullets,
+except that ordinal numbers are used in place of bullets,
starting at 1.
.It Fl hang
Like
@@ -1614,7 +1614,7 @@ Examples:
.Dl ".Nm aucat Fl o Fl"
.Dl ".Nm kill Fl Ar signal_number"
.Pp
-For GNU-sytle long options, escaping the additional hyphen-minus is not
+For GNU-style long options, escaping the additional hyphen-minus is not
strictly required, but may be safer with future versions of GNU troff; see
.Xr mandoc_char 7
for details.
@@ -2074,31 +2074,28 @@ Examples:
See also
.Ic \&Oo .
.Tg Os
-.It Ic \&Os Op Ar system Op Ar version
-Operating system version for display in the page footer.
-This is the mandatory third macro of
-any
+.It Ic \&Os Op Ar footer text
+The mandatory third macro of every
.Nm
file.
+Usually, do not specify any arguments,
+in particular not the operating system name and/or version.
.Pp
-The optional
-.Ar system
-parameter specifies the relevant operating system or environment.
-It is suggested to leave it unspecified, in which case
+If no argument is given,
.Xr mandoc 1
-uses its
+prints its
.Fl Ios
-argument or, if that isn't specified either,
+argument in the page footer, or
.Fa sysname
and
.Fa release
as returned by
-.Xr uname 3 .
+.Xr uname 3
+by default.
.Pp
-Examples:
-.Dl \&.Os
-.Dl \&.Os KTH/CSC/TCS
-.Dl \&.Os BSD 4.3
+Manual pages that are part of a portable software project can override
+the default by giving the project name and version number as arguments,
+but leaving it blank is never a bad choice.
.Pp
See also
.Ic \&Dd
@@ -2408,7 +2405,7 @@ The second major version of the C language standard.
.br
The third major version of the C language standard.
.El
-.It POSIX.1 before the Single UNIX Specification
+.It POSIX.1 before XPG4.2
.Pp
.Bl -tag -width "-p1003.1g-2000" -compact
.It \-p1003.1-88
@@ -2449,7 +2446,7 @@ Technical Corrigendum.
.br
Includes POSIX.1-1990, 1b, 1c, and 1i.
.El
-.It X/Open Portability Guide version 4 and related standards
+.It X/Open Portability Guide before XPG4.2
.Pp
.Bl -tag -width "-p1003.1g-2000" -compact
.It \-xpg3
@@ -2476,7 +2473,7 @@ Updates to POSIX.2.
.br
Based on POSIX.1 and POSIX.2, published in 1992.
.El
-.It Single UNIX Specification version 1 and related standards
+.It X/Open Portability Guide Issue 4 Version 2 and related standards
.Pp
.Bl -tag -width "-p1003.1g-2000" -compact
.It \-susv1
@@ -2486,10 +2483,7 @@ Based on POSIX.1 and POSIX.2, published in 1992.
.br
This standard was published in 1994.
It was used as the basis for UNIX 95 certification.
-The following three refer to parts of it.
-.Pp
-.It \-xsh4.2
-.St -xsh4.2
+The following two refer to parts of it.
.Pp
.It \-xcurses4.2
.St -xcurses4.2
@@ -2504,13 +2498,14 @@ Networking APIs, including sockets.
.br
Published in 1995.
.El
-.It Single UNIX Specification version 2 and related standards
+.It X/Open Portability Guide Issue 5 and related standards
.Pp
.Bl -tag -width "-p1003.1g-2000" -compact
.It \-susv2
.St -susv2
+.br
This Standard was published in 1997
-and is also called X/Open Portability Guide version 5.
+and is also called X/Open Portability Guide Issue 5.
It was used as the basis for UNIX 98 certification.
The following refer to parts of it.
.Pp
@@ -2528,7 +2523,7 @@ The following refer to parts of it.
.It \-xns5.2
.St -xns5.2
.El
-.It Single UNIX Specification version 3
+.It POSIX Issue 6
.Pp
.Bl -tag -width "-p1003.1-2001" -compact
.It \-p1003.1-2001
@@ -2537,7 +2532,7 @@ The following refer to parts of it.
.St -susv3
.br
This standard is based on C99, SUSv2, POSIX.1-1996, 1d, and 1j.
-It is also called X/Open Portability Guide version 6.
+It is also called X/Open Portability Guide Issue 6.
It is used as the basis for UNIX 03 certification.
.Pp
.It \-p1003.1-2004
@@ -2545,7 +2540,7 @@ It is used as the basis for UNIX 03 certification.
.br
The second and last Technical Corrigendum.
.El
-.It Single UNIX Specification version 4
+.It POSIX Issues 7 and 8
.Pp
.Bl -tag -width "-p1003.1g-2000" -compact
.It \-p1003.1-2008
@@ -2553,8 +2548,18 @@ The second and last Technical Corrigendum.
.It \-susv4
.St -susv4
.br
-This standard is also called
-X/Open Portability Guide version 7.
+This standard is based on C99.
+It is also called the
+Open Group Standard Base Specifications, Issue 7.
+.El
+.Pp
+.Bl -tag -width "-p1003.1g-2000" -compact
+.It \-p1003.1-2024
+.St -p1003.1-2024
+.br
+This standard is based on C17.
+It is also called the
+Open Group Standard Base Specifications, Issue 8.
.El
.It Other standards
.Pp
@@ -3012,7 +3017,7 @@ then the macro accepts an arbitrary number of arguments.
.It Ic \&Fx Ta Yes Ta Yes Ta n
.It Ic \&Hf Ta \&No Ta \&No Ta n
.It Ic \&Ic Ta Yes Ta Yes Ta >0
-.It Ic \&In Ta \&No Ta \&No Ta 1
+.It Ic \&In Ta Yes Ta Yes Ta 1
.It Ic \&Lb Ta \&No Ta \&No Ta 1
.It Ic \&Li Ta Yes Ta Yes Ta >0
.It Ic \&Lk Ta Yes Ta Yes Ta >0
diff --git a/mdoc_html.c b/mdoc_html.c
index c0a0a6a56978..74d753a76bfb 100644
--- a/mdoc_html.c
+++ b/mdoc_html.c
@@ -1,7 +1,8 @@
-/* $Id: mdoc_html.c,v 1.342 2021/03/30 19:26:20 schwarze Exp $ */
+/* $Id: mdoc_html.c,v 1.350 2022/07/06 16:05:40 schwarze Exp $ */
/*
- * Copyright (c) 2014-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2014-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
+ * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -292,16 +293,16 @@ html_mdoc(void *arg, const struct roff_meta *mdoc)
if ((h->oflags & HTML_FRAGMENT) == 0) {
print_gen_decls(h);
print_otag(h, TAG_HTML, "");
- if (n != NULL && n->type == ROFFT_COMMENT)
- print_gen_comment(h, n);
t = print_otag(h, TAG_HEAD, "");
print_mdoc_head(mdoc, h);
print_tagq(h, t);
+ if (n != NULL && n->type == ROFFT_COMMENT)
+ print_gen_comment(h, n);
print_otag(h, TAG_BODY, "");
}
mdoc_root_pre(mdoc, h);
- t = print_otag(h, TAG_DIV, "c", "manual-text");
+ t = print_otag(h, TAG_MAIN, "c", "manual-text");
print_mdoc_nodelist(mdoc, n, h);
print_tagq(h, t);
mdoc_root_post(mdoc, h);
@@ -452,16 +453,19 @@ print_mdoc_node(MDOC_ARGS)
static void
mdoc_root_post(const struct roff_meta *meta, struct html *h)
{
- struct tag *t, *tt;
+ struct tag *t;
- t = print_otag(h, TAG_TABLE, "c", "foot");
- tt = print_otag(h, TAG_TR, "");
+ t = print_otag(h, TAG_DIV, "cr?", "foot", "doc-pagefooter",
+ "aria-label", "Manual footer line");
- print_otag(h, TAG_TD, "c", "foot-date");
+ print_otag(h, TAG_SPAN, "c", "foot-left");
+ print_stagq(h, t);
+
+ print_otag(h, TAG_SPAN, "c", "foot-date");
print_text(h, meta->date);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "foot-os");
+ print_otag(h, TAG_SPAN, "c", "foot-os");
print_text(h, meta->os);
print_tagq(h, t);
}
@@ -469,7 +473,7 @@ mdoc_root_post(const struct roff_meta *meta, struct html *h)
static int
mdoc_root_pre(const struct roff_meta *meta, struct html *h)
{
- struct tag *t, *tt;
+ struct tag *t;
char *volume, *title;
if (NULL == meta->arch)
@@ -484,18 +488,18 @@ mdoc_root_pre(const struct roff_meta *meta, struct html *h)
mandoc_asprintf(&title, "%s(%s)",
meta->title, meta->msec);
- t = print_otag(h, TAG_TABLE, "c", "head");
- tt = print_otag(h, TAG_TR, "");
+ t = print_otag(h, TAG_DIV, "cr?", "head", "doc-pageheader",
+ "aria-label", "Manual header line");
- print_otag(h, TAG_TD, "c", "head-ltitle");
+ print_otag(h, TAG_SPAN, "c", "head-ltitle");
print_text(h, title);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "head-vol");
+ print_otag(h, TAG_SPAN, "c", "head-vol");
print_text(h, volume);
- print_stagq(h, tt);
+ print_stagq(h, t);
- print_otag(h, TAG_TD, "c", "head-rtitle");
+ print_otag(h, TAG_SPAN, "c", "head-rtitle");
print_text(h, title);
print_tagq(h, t);
@@ -515,7 +519,7 @@ static int
mdoc_sh_pre(MDOC_ARGS)
{
struct roff_node *sn, *subn;
- struct tag *t, *tsec, *tsub;
+ struct tag *t, *tnav, *tsec, *tsub;
char *id;
int sc;
@@ -536,7 +540,8 @@ mdoc_sh_pre(MDOC_ARGS)
break;
if (sc < 2)
break;
- t = print_otag(h, TAG_H1, "c", "Sh");
+ tnav = print_otag(h, TAG_NAV, "r", "doc-toc");
+ t = print_otag(h, TAG_H2, "c", "Sh");
print_text(h, "TABLE OF CONTENTS");
print_tagq(h, t);
t = print_otag(h, TAG_UL, "c", "Bl-compact");
@@ -567,11 +572,11 @@ mdoc_sh_pre(MDOC_ARGS)
}
print_tagq(h, tsec);
}
- print_tagq(h, t);
+ print_tagq(h, tnav);
print_otag(h, TAG_SECTION, "c", "Sh");
break;
case ROFFT_HEAD:
- print_otag_id(h, TAG_H1, "Sh", n);
+ print_otag_id(h, TAG_H2, "Sh", n);
break;
case ROFFT_BODY:
if (n->sec == SEC_AUTHORS)
@@ -592,7 +597,7 @@ mdoc_ss_pre(MDOC_ARGS)
print_otag(h, TAG_SECTION, "c", "Ss");
break;
case ROFFT_HEAD:
- print_otag_id(h, TAG_H2, "Ss", n);
+ print_otag_id(h, TAG_H3, "Ss", n);
break;
case ROFFT_BODY:
break;
@@ -632,7 +637,7 @@ mdoc_nd_pre(MDOC_ARGS)
abort();
}
print_text(h, "\\(em");
- print_otag(h, TAG_SPAN, "c", "Nd");
+ print_otag(h, TAG_SPAN, "cr", "Nd", "doc-subtitle");
return 1;
}
@@ -664,26 +669,34 @@ mdoc_nm_pre(MDOC_ARGS)
static int
mdoc_xr_pre(MDOC_ARGS)
{
- if (NULL == n->child)
+ char *name, *section, *label;
+
+ if (n->child == NULL)
return 0;
+ name = n->child->string;
+ if (n->child->next != NULL) {
+ section = n->child->next->string;
+ mandoc_asprintf(&label, "%s, section %s", name, section);
+ } else
+ section = label = NULL;
+
if (h->base_man1)
- print_otag(h, TAG_A, "chM", "Xr",
- n->child->string, n->child->next == NULL ?
- NULL : n->child->next->string);
+ print_otag(h, TAG_A, "chM?", "Xr",
+ name, section, "aria-label", label);
else
- print_otag(h, TAG_A, "c", "Xr");
+ print_otag(h, TAG_A, "c?", "Xr", "aria-label", label);
- n = n->child;
- print_text(h, n->string);
+ free(label);
+ print_text(h, name);
- if (NULL == (n = n->next))
+ if (section == NULL)
return 0;
h->flags |= HTML_NOSPACE;
print_text(h, "(");
h->flags |= HTML_NOSPACE;
- print_text(h, n->string);
+ print_text(h, section);
h->flags |= HTML_NOSPACE;
print_text(h, ")");
return 0;
diff --git a/mdoc_macro.c b/mdoc_macro.c
index dd3885c702b7..889b80a64a68 100644
--- a/mdoc_macro.c
+++ b/mdoc_macro.c
@@ -1,7 +1,7 @@
-/* $Id: mdoc_macro.c,v 1.234 2020/01/19 18:02:00 schwarze Exp $ */
+/* $Id: mdoc_macro.c,v 1.235 2022/04/14 16:43:44 schwarze Exp $ */
/*
+ * Copyright (c) 2010, 2012-2021 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -26,6 +26,9 @@
#include <string.h>
#include <time.h>
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
#include "mandoc.h"
#include "roff.h"
#include "mdoc.h"
@@ -1511,6 +1514,11 @@ in_line_eoln(MACRO_PROT_ARGS)
rew_last(mdoc, n->parent);
}
+#if DEBUG_MEMORY
+ if (tok == MDOC_Dt)
+ mandoc_dbg_name(buf);
+#endif
+
if (buf[*pos] == '\0' &&
(tok == MDOC_Fd || *roff_name[tok] == '%')) {
mandoc_msg(MANDOCERR_MACRO_EMPTY,
diff --git a/mdoc_man.c b/mdoc_man.c
index 0964cc6160a1..d4fd88304fb0 100644
--- a/mdoc_man.c
+++ b/mdoc_man.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_man.c,v 1.137 2021/07/04 15:38:26 schwarze Exp $ */
+/* $Id: mdoc_man.c,v 1.138 2023/04/28 19:11:04 schwarze Exp $ */
/*
* Copyright (c) 2011-2021 Ingo Schwarze <schwarze@openbsd.org>
*
@@ -1518,7 +1518,7 @@ mid_it(void)
Bl_stack[Bl_stack_len - 1]);
print_word(buf);
- /* Remeber to close out this .RS block later. */
+ /* Remember to close out this .RS block later. */
Bl_stack_post[Bl_stack_len - 1] = 1;
}
diff --git a/mdoc_markdown.c b/mdoc_markdown.c
index 63d8e1705580..ecad77e308e6 100644
--- a/mdoc_markdown.c
+++ b/mdoc_markdown.c
@@ -1,4 +1,4 @@
-/* $Id: mdoc_markdown.c,v 1.37 2021/08/10 12:55:03 schwarze Exp $ */
+/* $Id: mdoc_markdown.c,v 1.38 2024/08/13 12:44:00 schwarze Exp $ */
/*
* Copyright (c) 2017, 2018, 2020 Ingo Schwarze <schwarze@openbsd.org>
*
@@ -750,7 +750,7 @@ md_pre_raw(struct roff_node *n)
if ((prefix = md_act(n->tok)->prefix) != NULL) {
md_rawword(prefix);
outflags &= ~MD_spc;
- if (*prefix == '`')
+ if (strchr(prefix, '`') != NULL)
code_blocks++;
}
return 1;
@@ -764,7 +764,7 @@ md_post_raw(struct roff_node *n)
if ((suffix = md_act(n->tok)->suffix) != NULL) {
outflags &= ~(MD_spc | MD_nl);
md_rawword(suffix);
- if (*suffix == '`')
+ if (strchr(suffix, '`') != NULL)
code_blocks--;
}
}
diff --git a/mdoc_state.c b/mdoc_state.c
index d696ff27e06c..e89d6290e381 100644
--- a/mdoc_state.c
+++ b/mdoc_state.c
@@ -1,6 +1,6 @@
-/* $Id: mdoc_state.c,v 1.17 2020/06/22 19:20:40 schwarze Exp $ */
+/* $Id: mdoc_state.c,v 1.19 2022/08/19 12:59:26 schwarze Exp $ */
/*
- * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2014,2015,2017,2018,2022 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -23,6 +23,9 @@
#include <stdlib.h>
#include <string.h>
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
#include "mandoc.h"
#include "roff.h"
#include "mdoc.h"
@@ -34,6 +37,7 @@
typedef void (*state_handler)(STATE_ARGS);
+static void setsec(struct roff_node *, enum roff_sec);
static void state_bl(STATE_ARGS);
static void state_sh(STATE_ARGS);
static void state_sm(STATE_ARGS);
@@ -205,35 +209,36 @@ state_bl(STATE_ARGS)
}
static void
-state_sh(STATE_ARGS)
+setsec(struct roff_node *n, enum roff_sec sec)
{
struct roff_node *nch;
- char *secname;
- if (n->type != ROFFT_HEAD)
- return;
+ n->sec = sec;
+ for (nch = n->child; nch != NULL; nch = nch->next)
+ setsec(nch, sec);
+}
- if ( ! (n->flags & NODE_VALID)) {
- secname = NULL;
- deroff(&secname, n);
+/*
+ * Set the section attribute for the BLOCK, HEAD, and HEAD children.
+ * For other nodes, including the .Sh BODY, this is done when allocating
+ * the node data structures, but for .Sh BLOCK and HEAD, the section is
+ * still unknown at that time.
+ */
+static void
+state_sh(STATE_ARGS)
+{
+ enum roff_sec sec;
- /*
- * Set the section attribute for the BLOCK, HEAD,
- * and HEAD children; the latter can only be TEXT
- * nodes, so no recursion is needed. For other
- * nodes, including the .Sh BODY, this is done
- * when allocating the node data structures, but
- * for .Sh BLOCK and HEAD, the section is still
- * unknown at that time.
- */
+ if (n->type != ROFFT_HEAD)
+ return;
- n->sec = n->parent->sec = secname == NULL ?
- SEC_CUSTOM : mdoc_a2sec(secname);
- for (nch = n->child; nch != NULL; nch = nch->next)
- nch->sec = n->sec;
- free(secname);
+ if ((n->flags & NODE_VALID) == 0) {
+ sec = n->child != NULL && n->child->type == ROFFT_TEXT &&
+ n->child->next == NULL ? mdoc_a2sec(n->child->string) :
+ SEC_CUSTOM;
+ n->parent->sec = sec;
+ setsec(n, sec);
}
-
if ((mdoc->lastsec = n->sec) == SEC_SYNOPSIS) {
roff_setreg(mdoc->roff, "nS", 1, '=');
mdoc->flags |= MDOC_SYNOPSIS;
diff --git a/mdoc_term.c b/mdoc_term.c
index 42392c7c0a59..931bc384a002 100644
--- a/mdoc_term.c
+++ b/mdoc_term.c
@@ -1,6 +1,6 @@
-/* $Id: mdoc_term.c,v 1.380 2020/04/06 10:16:17 schwarze Exp $ */
+/* $Id: mdoc_term.c,v 1.383 2023/11/13 19:13:01 schwarze Exp $ */
/*
- * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2012-2020, 2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2013 Franco Fichtner <franco@lastsummer.de>
*
@@ -250,7 +250,6 @@ terminal_mdoc(void *arg, const struct roff_meta *mdoc)
{
struct roff_node *n, *nn;
struct termp *p;
- size_t save_defindent;
p = (struct termp *)arg;
p->tcol->rmargin = p->maxrmargin = p->defrmargin;
@@ -275,9 +274,6 @@ terminal_mdoc(void *arg, const struct roff_meta *mdoc)
print_mdoc_nodelist(p, NULL, mdoc, n);
term_newln(p);
} else {
- save_defindent = p->defindent;
- if (p->defindent == 0)
- p->defindent = 5;
term_begin(p, print_mdoc_head, print_mdoc_foot, mdoc);
while (n != NULL &&
(n->type == ROFFT_COMMENT ||
@@ -289,7 +285,6 @@ terminal_mdoc(void *arg, const struct roff_meta *mdoc)
print_mdoc_nodelist(p, NULL, mdoc, n);
}
term_end(p);
- p->defindent = save_defindent;
}
}
@@ -320,8 +315,11 @@ print_mdoc_node(DECL_ARGS)
(p->flags & TERMP_NONEWLINE) == 0)
term_newln(p);
p->flags |= TERMP_BRNEVER;
- } else
+ } else {
+ if (n->flags & NODE_LINE)
+ term_tab_ref(p);
p->flags &= ~TERMP_BRNEVER;
+ }
if (n->type == ROFFT_COMMENT || n->flags & NODE_NOPRT)
return;
@@ -566,8 +564,8 @@ a2width(const struct termp *p, const char *v)
end = a2roffsu(v, &su, SCALE_MAX);
if (end == NULL || *end != '\0') {
- SCALE_HS_INIT(&su, term_strlen(p, v));
- su.scale /= term_strlen(p, "0");
+ su.unit = SCALE_EN;
+ su.scale = term_strlen(p, v) / term_strlen(p, "0");
}
return term_hen(p, &su);
}
@@ -703,9 +701,9 @@ termp_it_pre(DECL_ARGS)
for (i = 0, nn = n->prev;
nn->prev && i < (int)ncols;
nn = nn->prev, i++) {
- SCALE_HS_INIT(&su,
- term_strlen(p, bl->norm->Bl.cols[i]));
- su.scale /= term_strlen(p, "0");
+ su.unit = SCALE_EN;
+ su.scale = term_strlen(p, bl->norm->Bl.cols[i]) /
+ term_strlen(p, "0");
offset += term_hen(p, &su) + dcol;
}
@@ -722,8 +720,9 @@ termp_it_pre(DECL_ARGS)
* Use the declared column widths, extended as explained
* in the preceding paragraph.
*/
- SCALE_HS_INIT(&su, term_strlen(p, bl->norm->Bl.cols[i]));
- su.scale /= term_strlen(p, "0");
+ su.unit = SCALE_EN;
+ su.scale = term_strlen(p, bl->norm->Bl.cols[i]) /
+ term_strlen(p, "0");
width = term_hen(p, &su) + dcol;
break;
default:
diff --git a/mdoc_validate.c b/mdoc_validate.c
index e1cd3ae1edcb..c4c8aecfa71e 100644
--- a/mdoc_validate.c
+++ b/mdoc_validate.c
@@ -1,6 +1,6 @@
-/* $Id: mdoc_validate.c,v 1.389 2021/07/18 11:41:23 schwarze Exp $ */
+/* $Id: mdoc_validate.c,v 1.391 2022/06/08 16:31:46 schwarze Exp $ */
/*
- * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2021 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2010 Joerg Sonnenberger <joerg@netbsd.org>
*
@@ -1113,7 +1113,8 @@ post_tg(POST_ARGS)
/* Find the next node. */
n = mdoc->last;
for (nn = n; nn != NULL; nn = nn->parent) {
- if (nn->next != NULL) {
+ if (nn->type != ROFFT_HEAD && nn->type != ROFFT_BODY &&
+ nn->type != ROFFT_TAIL && nn->next != NULL) {
nn = nn->next;
break;
}
@@ -2872,7 +2873,6 @@ post_os(POST_ARGS)
{
#ifndef OSNAME
struct utsname utsname;
- static char *defbuf;
#endif
struct roff_node *n;
@@ -2909,15 +2909,15 @@ post_os(POST_ARGS)
#ifdef OSNAME
mdoc->meta.os = mandoc_strdup(OSNAME);
#else /*!OSNAME */
- if (defbuf == NULL) {
+ if (mdoc->os_r == NULL) {
if (uname(&utsname) == -1) {
mandoc_msg(MANDOCERR_OS_UNAME, n->line, n->pos, "Os");
- defbuf = mandoc_strdup("UNKNOWN");
+ mdoc->os_r = mandoc_strdup("UNKNOWN");
} else
- mandoc_asprintf(&defbuf, "%s %s",
+ mandoc_asprintf(&mdoc->os_r, "%s %s",
utsname.sysname, utsname.release);
}
- mdoc->meta.os = mandoc_strdup(defbuf);
+ mdoc->meta.os = mandoc_strdup(mdoc->os_r);
#endif /*!OSNAME*/
out:
diff --git a/out.c b/out.c
index 12333e38ffde..bb29f78c9701 100644
--- a/out.c
+++ b/out.c
@@ -1,4 +1,4 @@
-/* $Id: out.c,v 1.82 2021/09/07 17:07:58 schwarze Exp $ */
+/* $Id: out.c,v 1.85 2021/10/17 21:05:54 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2011, 2014, 2015, 2017, 2018, 2019, 2021
@@ -123,6 +123,7 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
const struct tbl_dat *dp;
struct roffcol *col;
struct tbl_colgroup *first_group, **gp, *g;
+ size_t *colwidth;
size_t ewidth, min1, min2, wanted, width, xwidth;
int done, icol, maxcol, necol, nxcol, quirkcol;
@@ -148,7 +149,6 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
* to data cells in the data section.
*/
- gp = &first_group;
for (dp = sp->first; dp != NULL; dp = dp->next) {
icol = dp->layout->col;
while (maxcol < icol + dp->hspans)
@@ -189,16 +189,16 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
continue;
/*
- * Build an ordered, singly linked list
+ * Build a singly linked list
* of all groups of columns joined by spans,
* recording the minimum width for each group.
*/
- while (*gp != NULL && ((*gp)->startcol < icol ||
- (*gp)->endcol < icol + dp->hspans))
+ gp = &first_group;
+ while (*gp != NULL && ((*gp)->startcol != icol ||
+ (*gp)->endcol != icol + dp->hspans))
gp = &(*gp)->next;
- if (*gp == NULL || (*gp)->startcol > icol ||
- (*gp)->endcol > icol + dp->hspans) {
+ if (*gp == NULL) {
g = mandoc_malloc(sizeof(*g));
g->next = *gp;
g->wanted = width;
@@ -247,25 +247,37 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
done = 1;
break;
} else
- (*gp)->wanted -= width;
+ g->wanted -= width;
}
if (done) {
*gp = g->next;
free(g);
} else
- gp = &(*gp)->next;
+ gp = &g->next;
}
+ colwidth = mandoc_reallocarray(NULL, maxcol + 1, sizeof(*colwidth));
while (first_group != NULL) {
/*
+ * Rebuild the array of the widths of all columns
+ * participating in spans that require expansion.
+ */
+
+ for (icol = 0; icol <= maxcol; icol++)
+ colwidth[icol] = SIZE_MAX;
+ for (g = first_group; g != NULL; g = g->next)
+ for (icol = g->startcol; icol <= g->endcol; icol++)
+ colwidth[icol] = tbl->cols[icol].width;
+
+ /*
* Find the smallest and second smallest column width
* among the columns which may need expamsion.
*/
min1 = min2 = SIZE_MAX;
for (icol = 0; icol <= maxcol; icol++) {
- width = tbl->cols[icol].width;
+ width = colwidth[icol];
if (min1 > width) {
min2 = min1;
min1 = width;
@@ -283,7 +295,7 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
for (g = first_group; g != NULL; g = g->next) {
necol = 0;
for (icol = g->startcol; icol <= g->endcol; icol++)
- if (tbl->cols[icol].width == min1)
+ if (colwidth[icol] == min1)
necol++;
if (necol == 0)
continue;
@@ -300,7 +312,7 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
while ((g = *gp) != NULL) {
done = 0;
for (icol = g->startcol; icol <= g->endcol; icol++) {
- if (tbl->cols[icol].width != min1)
+ if (colwidth[icol] != min1)
continue;
if (g->wanted <= wanted - min1) {
tbl->cols[icol].width += g->wanted;
@@ -314,9 +326,10 @@ tblcalc(struct rofftbl *tbl, const struct tbl_span *sp_first,
*gp = g->next;
free(g);
} else
- gp = &(*gp)->next;
+ gp = &g->next;
}
}
+ free(colwidth);
/*
* Align numbers with text.
diff --git a/out.h b/out.h
index e621cbb7b92e..f746e4486958 100644
--- a/out.h
+++ b/out.h
@@ -1,4 +1,4 @@
-/* $Id: out.h,v 1.34 2020/04/03 11:35:01 schwarze Exp $ */
+/* $Id: out.h,v 1.35 2022/09/11 09:13:48 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2014, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
@@ -57,11 +57,6 @@ struct rofftbl {
void *arg; /* passed to sulen, slen, and len */
};
-#define SCALE_HS_INIT(p, v) \
- do { (p)->unit = SCALE_EN; \
- (p)->scale = (v); } \
- while (/* CONSTCOND */ 0)
-
struct tbl_span;
diff --git a/read.c b/read.c
index 5b33edbe9cef..0e8d32df5a5e 100644
--- a/read.c
+++ b/read.c
@@ -1,4 +1,4 @@
-/* $Id: read.c,v 1.220 2021/06/27 17:57:54 schwarze Exp $ */
+/* $Id: read.c,v 1.221 2022/05/19 14:48:56 schwarze Exp $ */
/*
* Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -261,7 +261,12 @@ mparse_buf_r(struct mparse *curp, struct buf blk, size_t i, int start)
lastln = thisln;
}
- /* XXX Ugly hack to mark the end of the input. */
+ /*
+ * XXX Ugly hack to mark the end of the input,
+ * such that the function roff_parse_comment()
+ * doesn't attempt to append another line if the
+ * last input line ends with an escape character.
+ */
if (i == blk.sz || blk.buf[i] == '\0') {
if (pos + 2 > ln.sz)
diff --git a/roff.7 b/roff.7
index 6c2e3583f69b..27f83853e75b 100644
--- a/roff.7
+++ b/roff.7
@@ -1,7 +1,7 @@
-.\" $Id: roff.7,v 1.116 2021/09/18 12:23:06 schwarze Exp $
+.\" $Id: roff.7,v 1.121 2023/10/23 20:25:02 schwarze Exp $
.\"
+.\" Copyright (c) 2010-2019, 2022-2023 Ingo Schwarze <schwarze@openbsd.org>
.\" Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
-.\" Copyright (c) 2010-2019 Ingo Schwarze <schwarze@openbsd.org>
.\"
.\" Permission to use, copy, modify, and distribute this software for any
.\" purpose with or without fee is hereby granted, provided that the above
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: September 18 2021 $
+.Dd $Mdocdate: October 23 2023 $
.Dt ROFF 7
.Os
.Sh NAME
@@ -2021,8 +2021,23 @@ End conditional input; see
Paddable non-breaking space character.
.It Ic \e0
Digit width space character.
-.It Ic \eA\(aq Ns Ar string Ns Ic \(aq
-Anchor definition; ignored by
+.It Ic \eA\(aq Ns Ar name Ns Ic \(aq
+Interpolate
+.Sq 1
+if
+.Ar name
+is a syntactically valid identifier that can be used
+as a name for a macro or user-defined string, or
+.Sq 0
+otherwise.
+This is a thoroughly non-portable groff extension.
+Heirloom troff uses the same escape sequence with the same syntax
+for a completely different purpose,
+defining a hyperlink target position, also called an
+.Dq anchor ,
+with the given
+.Ar name .
+The Heirloom semantics is not supported by
.Xr mandoc 1 .
.It Ic \ea
Leader character; ignored by
@@ -2085,7 +2100,8 @@ defaults to
.Ic \efP .
.It Ic \eg[ Ns Ar name Ns Ic \&]
Interpolate the format of a number register; ignored by
-.Xr mandoc 1 .
+.Xr mandoc 1 ,
+which interpolates an empty string instead.
For short names, there are variants
.Ic \eg Ns Ar c
and
@@ -2166,8 +2182,8 @@ Break the output line at the end of the current word.
Set number register; ignored by
.Xr mandoc 1 .
.It Ic \er
-Move up by one line; ignored by
-.Xr mandoc 1 .
+Reverse line feed: move up by one output line.
+Currently unsupported.
.It Ic \eS\(aq Ns Ar number Ns Ic \(aq
Slant output; ignored by
.Xr mandoc 1 .
@@ -2188,12 +2204,16 @@ Horizontal tab; ignored by
Move up by half a line; ignored by
.Xr mandoc 1 .
.It Ic \eV[ Ns Ar name Ns Ic \&]
-Interpolate an environment variable; ignored by
-.Xr mandoc 1 .
+Interpolate an environment variable.
For short names, there are variants
.Ic \eV Ns Ar c
and
.Ic \eV( Ns Ar cc .
+This escape sequence is intentionally unsupported;
+.Xr mandoc 1
+prints the string
+.Dq Pf $ Brq Ar name
+instead of inspecting the environment.
.It Ic \ev\(aq Ns Ar number Ns Ic \(aq
Vertical motion; ignored by
.Xr mandoc 1 .
@@ -2204,7 +2224,8 @@ The
.Xr mandoc 1
implementation assumes that after expansion of user-defined strings, the
.Ar string
-only contains normal characters, no escape sequences, and that each
+only contains normal characters, characters expressed as escape sequences,
+and zero-width escape sequences, and that each
character has a width of 24 basic units.
.It Ic \eX\(aq Ns Ar string Ns Ic \(aq
Output
diff --git a/roff.c b/roff.c
index de75a260f5a7..bdb02101c053 100644
--- a/roff.c
+++ b/roff.c
@@ -1,6 +1,6 @@
-/* $Id: roff.c,v 1.378 2021/08/10 12:55:04 schwarze Exp $ */
+/* $Id: roff.c,v 1.400 2023/10/24 20:53:12 schwarze Exp $ */
/*
- * Copyright (c) 2010-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008-2012, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -40,14 +40,6 @@
#include "tbl_parse.h"
#include "eqn_parse.h"
-/*
- * ASCII_ESC is used to signal from roff_getarg() to roff_expand()
- * that an escape sequence resulted from copy-in processing and
- * needs to be checked or interpolated. As it is used nowhere
- * else, it is defined here rather than in a header file.
- */
-#define ASCII_ESC 27
-
/* Maximum number of string expansions per line, to break infinite loops. */
#define EXPAND_LIMIT 1000
@@ -207,6 +199,8 @@ static int roff_evalpar(struct roff *, int,
static int roff_evalstrcond(const char *, int *);
static int roff_expand(struct roff *, struct buf *,
int, int, char);
+static void roff_expand_patch(struct buf *, int,
+ const char *, int);
static void roff_free1(struct roff *);
static void roff_freereg(struct roffreg *);
static void roff_freestr(struct roffkv *);
@@ -227,15 +221,19 @@ static int roff_line_ignore(ROFF_ARGS);
static void roff_man_alloc1(struct roff_man *);
static void roff_man_free1(struct roff_man *);
static int roff_manyarg(ROFF_ARGS);
+static int roff_mc(ROFF_ARGS);
static int roff_noarg(ROFF_ARGS);
static int roff_nop(ROFF_ARGS);
static int roff_nr(ROFF_ARGS);
static int roff_onearg(ROFF_ARGS);
static enum roff_tok roff_parse(struct roff *, char *, int *,
int, int);
+static int roff_parse_comment(struct roff *, struct buf *,
+ int, int, char);
static int roff_parsetext(struct roff *, struct buf *,
int, int *);
static int roff_renamed(ROFF_ARGS);
+static int roff_req_or_macro(ROFF_ARGS);
static int roff_return(ROFF_ARGS);
static int roff_rm(ROFF_ARGS);
static int roff_rn(ROFF_ARGS);
@@ -369,7 +367,8 @@ const char *__roff_name[MAN_MAX + 1] = {
"PD", "AT", "in",
"SY", "YS", "OP",
"EX", "EE", "UR",
- "UE", "MT", "ME", NULL
+ "UE", "MT", "ME", "MR",
+ NULL
};
const char *const *roff_name = __roff_name;
@@ -379,7 +378,7 @@ static struct roffmac roffs[TOKEN_NONE] = {
{ roff_noarg, NULL, NULL, 0 }, /* fi */
{ roff_onearg, NULL, NULL, 0 }, /* ft */
{ roff_onearg, NULL, NULL, 0 }, /* ll */
- { roff_onearg, NULL, NULL, 0 }, /* mc */
+ { roff_mc, NULL, NULL, 0 }, /* mc */
{ roff_noarg, NULL, NULL, 0 }, /* nf */
{ roff_onearg, NULL, NULL, 0 }, /* po */
{ roff_onearg, NULL, NULL, 0 }, /* rj */
@@ -861,6 +860,7 @@ void
roff_man_free(struct roff_man *man)
{
roff_man_free1(man);
+ free(man->os_r);
free(man);
}
@@ -1229,419 +1229,391 @@ deroff(char **dest, const struct roff_node *n)
/* --- main functions of the roff parser ---------------------------------- */
/*
- * In the current line, expand escape sequences that produce parsable
- * input text. Also check the syntax of the remaining escape sequences,
- * which typically produce output glyphs or change formatter state.
+ * Save comments preceding the title macro, for example in order to
+ * preserve Copyright and license headers in HTML output,
+ * provide diagnostics about RCS ids and trailing whitespace in comments,
+ * then discard comments including preceding whitespace.
+ * This function also handles input line continuation.
*/
static int
-roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char newesc)
+roff_parse_comment(struct roff *r, struct buf *buf, int ln, int pos, char ec)
{
- struct mctx *ctx; /* current macro call context */
- char ubuf[24]; /* buffer to print the number */
struct roff_node *n; /* used for header comments */
const char *start; /* start of the string to process */
+ const char *cp; /* for RCS id parsing */
char *stesc; /* start of an escape sequence ('\\') */
- const char *esct; /* type of esccape sequence */
char *ep; /* end of comment string */
- const char *stnam; /* start of the name, after "[(*" */
- const char *cp; /* end of the name, e.g. before ']' */
- const char *res; /* the string to be substituted */
- char *nbuf; /* new buffer to copy buf->buf to */
- size_t maxl; /* expected length of the escape name */
- size_t naml; /* actual length of the escape name */
- size_t asz; /* length of the replacement */
- size_t rsz; /* length of the rest of the string */
- int inaml; /* length returned from mandoc_escape() */
- int expand_count; /* to avoid infinite loops */
- int npos; /* position in numeric expression */
- int arg_complete; /* argument not interrupted by eol */
- int quote_args; /* true for \\$@, false for \\$* */
- int done; /* no more input available */
- int deftype; /* type of definition to paste */
int rcsid; /* kind of RCS id seen */
- enum mandocerr err; /* for escape sequence problems */
- char sign; /* increment number register */
- char term; /* character terminating the escape */
- /* Search forward for comments. */
-
- done = 0;
- start = buf->buf + pos;
- for (stesc = buf->buf + pos; *stesc != '\0'; stesc++) {
- if (stesc[0] != newesc || stesc[1] == '\0')
- continue;
- stesc++;
- if (*stesc != '"' && *stesc != '#')
- continue;
-
- /* Comment found, look for RCS id. */
-
- rcsid = 0;
- if ((cp = strstr(stesc, "$" "OpenBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_OPENBSD;
- cp += 8;
- } else if ((cp = strstr(stesc, "$" "NetBSD")) != NULL) {
- rcsid = 1 << MANDOC_OS_NETBSD;
- cp += 7;
- }
- if (cp != NULL &&
- isalnum((unsigned char)*cp) == 0 &&
- strchr(cp, '$') != NULL) {
- if (r->man->meta.rcsids & rcsid)
- mandoc_msg(MANDOCERR_RCS_REP, ln,
- (int)(stesc - buf->buf) + 1,
- "%s", stesc + 1);
- r->man->meta.rcsids |= rcsid;
+ for (start = stesc = buf->buf + pos;; stesc++) {
+ /*
+ * XXX Ugly hack: Remove the newline character that
+ * mparse_buf_r() appended to mark the end of input
+ * if it is not preceded by an escape character.
+ */
+ if (stesc[0] == '\n') {
+ assert(stesc[1] == '\0');
+ stesc[0] = '\0';
}
- /* Handle trailing whitespace. */
+ /* The line ends without continuation or comment. */
+ if (stesc[0] == '\0')
+ return ROFF_CONT;
- ep = strchr(stesc--, '\0') - 1;
- if (*ep == '\n') {
- done = 1;
- ep--;
- }
- if (*ep == ' ' || *ep == '\t')
- mandoc_msg(MANDOCERR_SPACE_EOL,
- ln, (int)(ep - buf->buf), NULL);
+ /* Unescaped byte: skip it. */
+ if (stesc[0] != ec)
+ continue;
/*
- * Save comments preceding the title macro
- * in the syntax tree.
+ * XXX Ugly hack: Do not attempt to append another line
+ * if the function mparse_buf_r() appended a newline
+ * character to indicate the end of input.
*/
-
- if (newesc != ASCII_ESC && r->options & MPARSE_COMMENT) {
- while (*ep == ' ' || *ep == '\t')
- ep--;
- ep[1] = '\0';
- n = roff_node_alloc(r->man,
- ln, stesc + 1 - buf->buf,
- ROFFT_COMMENT, TOKEN_NONE);
- n->string = mandoc_strdup(stesc + 2);
- roff_node_append(r->man, n);
- n->flags |= NODE_VALID | NODE_ENDED;
- r->man->next = ROFF_NEXT_SIBLING;
+ if (stesc[1] == '\n') {
+ assert(stesc[2] == '\0');
+ stesc[0] = '\0';
+ return ROFF_CONT;
}
- /* Line continuation with comment. */
-
- if (stesc[1] == '#') {
- *stesc = '\0';
+ /*
+ * An escape character at the end of an input line
+ * requests line continuation.
+ */
+ if (stesc[1] == '\0') {
+ stesc[0] = '\0';
return ROFF_IGN | ROFF_APPEND;
}
- /* Discard normal comments. */
+ /* Found a comment: process it. */
+ if (stesc[1] == '"' || stesc[1] == '#')
+ break;
- while (stesc > start && stesc[-1] == ' ' &&
- (stesc == start + 1 || stesc[-2] != '\\'))
- stesc--;
- *stesc = '\0';
- break;
+ /* Escaped escape character: skip them both. */
+ if (stesc[1] == ec)
+ stesc++;
}
- if (stesc == start)
- return ROFF_CONT;
- stesc--;
- /* Notice the end of the input. */
+ /* Look for an RCS id in the comment. */
- if (*stesc == '\n') {
- *stesc-- = '\0';
- done = 1;
+ rcsid = 0;
+ if ((cp = strstr(stesc + 2, "$" "OpenBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_OPENBSD;
+ cp += 8;
+ } else if ((cp = strstr(stesc + 2, "$" "NetBSD")) != NULL) {
+ rcsid = 1 << MANDOC_OS_NETBSD;
+ cp += 7;
+ }
+ if (cp != NULL && isalnum((unsigned char)*cp) == 0 &&
+ strchr(cp, '$') != NULL) {
+ if (r->man->meta.rcsids & rcsid)
+ mandoc_msg(MANDOCERR_RCS_REP, ln,
+ (int)(stesc - buf->buf) + 2, "%s", stesc + 1);
+ r->man->meta.rcsids |= rcsid;
}
- expand_count = 0;
- while (stesc >= start) {
- if (*stesc != newesc) {
+ /* Warn about trailing whitespace at the end of the comment. */
- /*
- * If we have a non-standard escape character,
- * escape literal backslashes because all
- * processing in subsequent functions uses
- * the standard escaping rules.
- */
+ ep = strchr(stesc + 2, '\0') - 1;
+ if (*ep == '\n')
+ *ep-- = '\0';
+ if (*ep == ' ' || *ep == '\t')
+ mandoc_msg(MANDOCERR_SPACE_EOL,
+ ln, (int)(ep - buf->buf), NULL);
- if (newesc != ASCII_ESC && *stesc == '\\') {
- *stesc = '\0';
- buf->sz = mandoc_asprintf(&nbuf, "%s\\e%s",
- buf->buf, stesc + 1) + 1;
- start = nbuf + pos;
- stesc = nbuf + (stesc - buf->buf);
- free(buf->buf);
- buf->buf = nbuf;
- }
+ /* Save comments preceding the title macro in the syntax tree. */
- /* Search backwards for the next escape. */
+ if (r->options & MPARSE_COMMENT) {
+ while (*ep == ' ' || *ep == '\t')
+ ep--;
+ ep[1] = '\0';
+ n = roff_node_alloc(r->man, ln, stesc + 1 - buf->buf,
+ ROFFT_COMMENT, TOKEN_NONE);
+ n->string = mandoc_strdup(stesc + 2);
+ roff_node_append(r->man, n);
+ n->flags |= NODE_VALID | NODE_ENDED;
+ r->man->next = ROFF_NEXT_SIBLING;
+ }
- stesc--;
- continue;
- }
+ /* The comment requests line continuation. */
- /* If it is escaped, skip it. */
+ if (stesc[1] == '#') {
+ *stesc = '\0';
+ return ROFF_IGN | ROFF_APPEND;
+ }
- for (cp = stesc - 1; cp >= start; cp--)
- if (*cp != r->escape)
- break;
+ /* Discard the comment including preceding whitespace. */
- if ((stesc - cp) % 2 == 0) {
- while (stesc > cp)
- *stesc-- = '\\';
- continue;
- } else if (stesc[1] != '\0') {
- *stesc = '\\';
- } else {
- *stesc-- = '\0';
- if (done)
- continue;
- else
- return ROFF_IGN | ROFF_APPEND;
- }
+ while (stesc > start && stesc[-1] == ' ' &&
+ (stesc == start + 1 || stesc[-2] != '\\'))
+ stesc--;
+ *stesc = '\0';
+ return ROFF_CONT;
+}
- /* Decide whether to expand or to check only. */
+/*
+ * In the current line, expand escape sequences that produce parsable
+ * input text. Also check the syntax of the remaining escape sequences,
+ * which typically produce output glyphs or change formatter state.
+ */
+static int
+roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec)
+{
+ char ubuf[24]; /* buffer to print a number */
+ struct mctx *ctx; /* current macro call context */
+ const char *res; /* the string to be pasted */
+ const char *src; /* source for copying */
+ char *dst; /* destination for copying */
+ enum mandoc_esc subtype; /* return value from roff_escape */
+ int iesc; /* index of leading escape char */
+ int inam; /* index of the escape name */
+ int iarg; /* index beginning the argument */
+ int iendarg; /* index right after the argument */
+ int iend; /* index right after the sequence */
+ int isrc, idst; /* to reduce \\ and \. in names */
+ int deftype; /* type of definition to paste */
+ int argi; /* macro argument index */
+ int quote_args; /* true for \\$@, false for \\$* */
+ int asz; /* length of the replacement */
+ int rsz; /* length of the rest of the string */
+ int npos; /* position in numeric expression */
+ int expand_count; /* to avoid infinite loops */
- term = '\0';
- cp = stesc + 1;
- if (*cp == 'E')
- cp++;
- esct = cp;
- switch (*esct) {
- case '*':
- case '$':
- res = NULL;
- break;
- case 'B':
- case 'w':
- term = cp[1];
- /* FALLTHROUGH */
- case 'n':
- sign = cp[1];
- if (sign == '+' || sign == '-')
- cp++;
- res = ubuf;
- break;
- default:
- err = MANDOCERR_OK;
- switch(mandoc_escape(&cp, &stnam, &inaml)) {
- case ESCAPE_SPECIAL:
- if (mchars_spec2cp(stnam, inaml) >= 0)
- break;
- /* FALLTHROUGH */
- case ESCAPE_ERROR:
- err = MANDOCERR_ESC_BAD;
- break;
- case ESCAPE_UNDEF:
- err = MANDOCERR_ESC_UNDEF;
- break;
- case ESCAPE_UNSUPP:
- err = MANDOCERR_ESC_UNSUPP;
- break;
- default:
- break;
+ expand_count = 0;
+ while (buf->buf[pos] != '\0') {
+
+ /*
+ * Skip plain ASCII characters.
+ * If we have a non-standard escape character,
+ * escape literal backslashes because all processing in
+ * subsequent functions uses the standard escaping rules.
+ */
+
+ if (buf->buf[pos] != ec) {
+ if (buf->buf[pos] == '\\') {
+ roff_expand_patch(buf, pos, "\\e", pos + 1);
+ pos++;
}
- if (err != MANDOCERR_OK)
- mandoc_msg(err, ln, (int)(stesc - buf->buf),
- "%.*s", (int)(cp - stesc), stesc);
- stesc--;
+ pos++;
continue;
}
- if (EXPAND_LIMIT < ++expand_count) {
- mandoc_msg(MANDOCERR_ROFFLOOP,
- ln, (int)(stesc - buf->buf), NULL);
- return ROFF_IGN;
- }
-
/*
- * The third character decides the length
- * of the name of the string or register.
- * Save a pointer to the name.
+ * Parse escape sequences,
+ * issue diagnostic messages when appropriate,
+ * and skip sequences that do not need expansion.
+ * If we have a non-standard escape character, translate
+ * it to backslashes and translate backslashes to \e.
*/
- if (term == '\0') {
- switch (*++cp) {
- case '\0':
- maxl = 0;
- break;
- case '(':
- cp++;
- maxl = 2;
- break;
- case '[':
- cp++;
- term = ']';
- maxl = 0;
- break;
- default:
- maxl = 1;
- break;
+ if (roff_escape(buf->buf, ln, pos, &iesc, &inam,
+ &iarg, &iendarg, &iend) != ESCAPE_EXPAND) {
+ while (pos < iend) {
+ if (buf->buf[pos] == ec) {
+ buf->buf[pos] = '\\';
+ if (pos + 1 < iend)
+ pos++;
+ } else if (buf->buf[pos] == '\\') {
+ roff_expand_patch(buf,
+ pos, "\\e", pos + 1);
+ pos++;
+ iend++;
+ }
+ pos++;
}
- } else {
- cp += 2;
- maxl = 0;
+ continue;
}
- stnam = cp;
-
- /* Advance to the end of the name. */
- naml = 0;
- arg_complete = 1;
- while (maxl == 0 || naml < maxl) {
- if (*cp == '\0') {
- mandoc_msg(MANDOCERR_ESC_BAD, ln,
- (int)(stesc - buf->buf), "%s", stesc);
- arg_complete = 0;
- break;
- }
- if (maxl == 0 && *cp == term) {
- cp++;
- break;
- }
- if (*cp++ != '\\' || *esct != 'w') {
- naml++;
- continue;
- }
- switch (mandoc_escape(&cp, NULL, NULL)) {
- case ESCAPE_SPECIAL:
- case ESCAPE_UNICODE:
- case ESCAPE_NUMBERED:
- case ESCAPE_UNDEF:
- case ESCAPE_OVERSTRIKE:
- naml++;
- break;
- default:
- break;
+ /* Reduce \\ and \. in names. */
+
+ if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') {
+ isrc = idst = iarg;
+ while (isrc < iendarg) {
+ if (isrc + 1 < iendarg &&
+ buf->buf[isrc] == '\\' &&
+ (buf->buf[isrc + 1] == '\\' ||
+ buf->buf[isrc + 1] == '.'))
+ isrc++;
+ buf->buf[idst++] = buf->buf[isrc++];
}
+ iendarg -= isrc - idst;
}
- /*
- * Retrieve the replacement string; if it is
- * undefined, resume searching for escapes.
- */
+ /* Handle expansion. */
- switch (*esct) {
+ res = NULL;
+ switch (buf->buf[inam]) {
case '*':
- if (arg_complete) {
- deftype = ROFFDEF_USER | ROFFDEF_PRE;
- res = roff_getstrn(r, stnam, naml, &deftype);
-
- /*
- * If not overriden, let \*(.T
- * through to the formatters.
- */
-
- if (res == NULL && naml == 2 &&
- stnam[0] == '.' && stnam[1] == 'T') {
- roff_setstrn(&r->strtab,
- ".T", 2, NULL, 0, 0);
- stesc--;
- continue;
- }
+ if (iendarg == iarg)
+ break;
+ deftype = ROFFDEF_USER | ROFFDEF_PRE;
+ if ((res = roff_getstrn(r, buf->buf + iarg,
+ iendarg - iarg, &deftype)) != NULL)
+ break;
+
+ /*
+ * If not overridden,
+ * let \*(.T through to the formatters.
+ */
+
+ if (iendarg - iarg == 2 &&
+ buf->buf[iarg] == '.' &&
+ buf->buf[iarg + 1] == 'T') {
+ roff_setstrn(&r->strtab, ".T", 2, NULL, 0, 0);
+ pos = iend;
+ continue;
}
+
+ mandoc_msg(MANDOCERR_STR_UNDEF, ln, iesc,
+ "%.*s", iendarg - iarg, buf->buf + iarg);
break;
+
case '$':
if (r->mstackpos < 0) {
- mandoc_msg(MANDOCERR_ARG_UNDEF, ln,
- (int)(stesc - buf->buf), "%.3s", stesc);
+ mandoc_msg(MANDOCERR_ARG_UNDEF, ln, iesc,
+ "%.*s", iend - iesc, buf->buf + iesc);
break;
}
ctx = r->mstack + r->mstackpos;
- npos = esct[1] - '1';
- if (npos >= 0 && npos <= 8) {
- res = npos < ctx->argc ?
- ctx->argv[npos] : "";
+ argi = buf->buf[iarg] - '1';
+ if (argi >= 0 && argi <= 8) {
+ if (argi < ctx->argc)
+ res = ctx->argv[argi];
break;
}
- if (esct[1] == '*')
+ if (buf->buf[iarg] == '*')
quote_args = 0;
- else if (esct[1] == '@')
+ else if (buf->buf[iarg] == '@')
quote_args = 1;
else {
- mandoc_msg(MANDOCERR_ARG_NONUM, ln,
- (int)(stesc - buf->buf), "%.3s", stesc);
+ mandoc_msg(MANDOCERR_ARG_NONUM, ln, iesc,
+ "%.*s", iend - iesc, buf->buf + iesc);
break;
}
asz = 0;
- for (npos = 0; npos < ctx->argc; npos++) {
- if (npos)
+ for (argi = 0; argi < ctx->argc; argi++) {
+ if (argi)
asz++; /* blank */
if (quote_args)
asz += 2; /* quotes */
- asz += strlen(ctx->argv[npos]);
+ asz += strlen(ctx->argv[argi]);
}
- if (asz != 3) {
- rsz = buf->sz - (stesc - buf->buf) - 3;
- if (asz < 3)
- memmove(stesc + asz, stesc + 3, rsz);
- buf->sz += asz - 3;
- nbuf = mandoc_realloc(buf->buf, buf->sz);
- start = nbuf + pos;
- stesc = nbuf + (stesc - buf->buf);
- buf->buf = nbuf;
- if (asz > 3)
- memmove(stesc + asz, stesc + 3, rsz);
+ if (asz != iend - iesc) {
+ rsz = buf->sz - iend;
+ if (asz < iend - iesc)
+ memmove(buf->buf + iesc + asz,
+ buf->buf + iend, rsz);
+ buf->sz = iesc + asz + rsz;
+ buf->buf = mandoc_realloc(buf->buf, buf->sz);
+ if (asz > iend - iesc)
+ memmove(buf->buf + iesc + asz,
+ buf->buf + iend, rsz);
}
- for (npos = 0; npos < ctx->argc; npos++) {
- if (npos)
- *stesc++ = ' ';
+ dst = buf->buf + iesc;
+ for (argi = 0; argi < ctx->argc; argi++) {
+ if (argi)
+ *dst++ = ' ';
if (quote_args)
- *stesc++ = '"';
- cp = ctx->argv[npos];
- while (*cp != '\0')
- *stesc++ = *cp++;
+ *dst++ = '"';
+ src = ctx->argv[argi];
+ while (*src != '\0')
+ *dst++ = *src++;
if (quote_args)
- *stesc++ = '"';
+ *dst++ = '"';
}
continue;
+ case 'A':
+ ubuf[0] = iendarg > iarg ? '1' : '0';
+ ubuf[1] = '\0';
+ res = ubuf;
+ break;
case 'B':
npos = 0;
- ubuf[0] = arg_complete &&
- roff_evalnum(r, ln, stnam, &npos,
- NULL, ROFFNUM_SCALE) &&
- stnam + npos + 1 == cp ? '1' : '0';
+ ubuf[0] = iendarg > iarg && iend > iendarg &&
+ roff_evalnum(r, ln, buf->buf + iarg, &npos,
+ NULL, ROFFNUM_SCALE) &&
+ npos == iendarg - iarg ? '1' : '0';
ubuf[1] = '\0';
+ res = ubuf;
+ break;
+ case 'V':
+ mandoc_msg(MANDOCERR_UNSUPP, ln, iesc,
+ "%.*s", iend - iesc, buf->buf + iesc);
+ roff_expand_patch(buf, iendarg, "}", iend);
+ roff_expand_patch(buf, iesc, "${", iarg);
+ continue;
+ case 'g':
break;
case 'n':
- if (arg_complete)
+ if (iendarg > iarg)
(void)snprintf(ubuf, sizeof(ubuf), "%d",
- roff_getregn(r, stnam, naml, sign));
+ roff_getregn(r, buf->buf + iarg,
+ iendarg - iarg, buf->buf[inam + 1]));
else
ubuf[0] = '\0';
+ res = ubuf;
break;
case 'w':
- /* use even incomplete args */
- (void)snprintf(ubuf, sizeof(ubuf), "%d",
- 24 * (int)naml);
+ rsz = 0;
+ subtype = ESCAPE_UNDEF;
+ while (iarg < iendarg) {
+ asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1;
+ if (buf->buf[iarg] != '\\') {
+ rsz += asz;
+ iarg++;
+ continue;
+ }
+ switch ((subtype = roff_escape(buf->buf, 0,
+ iarg, NULL, NULL, NULL, NULL, &iarg))) {
+ case ESCAPE_SPECIAL:
+ case ESCAPE_NUMBERED:
+ case ESCAPE_UNICODE:
+ case ESCAPE_OVERSTRIKE:
+ case ESCAPE_UNDEF:
+ break;
+ case ESCAPE_DEVICE:
+ asz *= 8;
+ break;
+ case ESCAPE_EXPAND:
+ abort();
+ default:
+ continue;
+ }
+ rsz += asz;
+ }
+ (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24);
+ res = ubuf;
+ break;
+ default:
break;
}
-
- if (res == NULL) {
- if (*esct == '*')
- mandoc_msg(MANDOCERR_STR_UNDEF,
- ln, (int)(stesc - buf->buf),
- "%.*s", (int)naml, stnam);
+ if (res == NULL)
res = "";
- } else if (buf->sz + strlen(res) > SHRT_MAX) {
- mandoc_msg(MANDOCERR_ROFFLOOP,
- ln, (int)(stesc - buf->buf), NULL);
+ if (++expand_count > EXPAND_LIMIT ||
+ buf->sz + strlen(res) > SHRT_MAX) {
+ mandoc_msg(MANDOCERR_ROFFLOOP, ln, iesc, NULL);
return ROFF_IGN;
}
-
- /* Replace the escape sequence by the string. */
-
- *stesc = '\0';
- buf->sz = mandoc_asprintf(&nbuf, "%s%s%s",
- buf->buf, res, cp) + 1;
-
- /* Prepare for the next replacement. */
-
- start = nbuf + pos;
- stesc = nbuf + (stesc - buf->buf) + strlen(res);
- free(buf->buf);
- buf->buf = nbuf;
+ roff_expand_patch(buf, iesc, res, iend);
}
return ROFF_CONT;
}
/*
+ * Replace the substring from the start position (inclusive)
+ * to end position (exclusive) with the repl(acement) string.
+ */
+static void
+roff_expand_patch(struct buf *buf, int start, const char *repl, int end)
+{
+ char *nbuf;
+
+ buf->sz = mandoc_asprintf(&nbuf, "%.*s%s%s", start, buf->buf,
+ repl, buf->buf + end) + 1;
+ free(buf->buf);
+ buf->buf = nbuf;
+}
+
+/*
* Parse a quoted or unquoted roff-style request or macro argument.
* Return a pointer to the parsed argument, which is either the original
* pointer or advanced by one byte in case the argument is quoted.
@@ -1688,8 +1660,8 @@ roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
cp++;
break;
case '\\':
+ cp[-pairs] = '\\';
newesc = 1;
- cp[-pairs] = ASCII_ESC;
pairs++;
cp++;
break;
@@ -1745,7 +1717,7 @@ roff_getarg(struct roff *r, char **cpp, int ln, int *pos)
buf.buf = start;
buf.sz = strlen(start) + 1;
buf.next = NULL;
- if (roff_expand(r, &buf, ln, 0, ASCII_ESC) & ROFF_IGN) {
+ if (roff_expand(r, &buf, ln, 0, '\\') == ROFF_IGN) {
free(buf.buf);
buf.buf = mandoc_strdup("");
}
@@ -1853,7 +1825,12 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
assert(e == ROFF_CONT);
}
- /* Expand some escape sequences. */
+ /* Handle comments and escape sequences. */
+
+ e = roff_parse_comment(r, buf, ln, pos, r->escape);
+ if ((e & ROFF_MASK) == ROFF_IGN)
+ return e;
+ assert(e == ROFF_CONT);
e = roff_expand(r, buf, ln, pos, r->escape);
if ((e & ROFF_MASK) == ROFF_IGN)
@@ -1903,7 +1880,6 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
/*
* If a scope is open, go to the child handler for that macro,
* as it may want to preprocess before doing anything with it.
- * Don't do so if an equation is open.
*/
if (r->last) {
@@ -1911,19 +1887,27 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
return (*roffs[t].sub)(r, t, buf, ln, ppos, pos, offs);
}
- /* No scope is open. This is a new request or macro. */
-
r->options &= ~MPARSE_COMMENT;
spos = pos;
t = roff_parse(r, buf->buf, &pos, ln, ppos);
+ return roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
+}
- /* Tables ignore most macros. */
+/*
+ * Handle a new request or macro.
+ * May be called outside any scope or from inside a conditional scope.
+ */
+static int
+roff_req_or_macro(ROFF_ARGS) {
- if (r->tbl != NULL && (t == TOKEN_NONE || t == ROFF_TS ||
- t == ROFF_br || t == ROFF_ce || t == ROFF_rj || t == ROFF_sp)) {
+ /* For now, tables ignore most macros and some request. */
+
+ if (r->tbl != NULL && (tok == TOKEN_NONE || tok == ROFF_TS ||
+ tok == ROFF_br || tok == ROFF_ce || tok == ROFF_rj ||
+ tok == ROFF_sp)) {
mandoc_msg(MANDOCERR_TBLMACRO,
- ln, pos, "%s", buf->buf + spos);
- if (t != TOKEN_NONE)
+ ln, ppos, "%s", buf->buf + ppos);
+ if (tok != TOKEN_NONE)
return ROFF_IGN;
while (buf->buf[pos] != '\0' && buf->buf[pos] != ' ')
pos++;
@@ -1936,9 +1920,9 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
/* For now, let high level macros abort .ce mode. */
- if (ctl && roffce_node != NULL &&
- (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
- t == ROFF_TH || t == ROFF_TS)) {
+ if (roffce_node != NULL &&
+ (tok == TOKEN_NONE || tok == ROFF_Dd || tok == ROFF_EQ ||
+ tok == ROFF_TH || tok == ROFF_TS)) {
r->man->last = roffce_node;
r->man->next = ROFF_NEXT_SIBLING;
roffce_lines = 0;
@@ -1950,12 +1934,12 @@ roff_parseln(struct roff *r, int ln, struct buf *buf, int *offs, size_t len)
* Let the standard macro set parsers handle it.
*/
- if (t == TOKEN_NONE)
+ if (tok == TOKEN_NONE)
return ROFF_CONT;
- /* Execute a roff request or a user defined macro. */
+ /* Execute a roff request or a user-defined macro. */
- return (*roffs[t].proc)(r, t, buf, ln, spos, pos, offs);
+ return (*roffs[tok].proc)(r, tok, buf, ln, ppos, pos, offs);
}
/*
@@ -1998,8 +1982,10 @@ roff_endparse(struct roff *r)
}
/*
- * Parse a roff node's type from the input buffer. This must be in the
- * form of ".foo xxx" in the usual way.
+ * Parse the request or macro name at buf[*pos].
+ * Return ROFF_RENAMED, ROFF_USERDEF, or a ROFF_* token value.
+ * For empty, undefined, mdoc(7), and man(7) macros, return TOKEN_NONE.
+ * As a side effect, set r->current_string to the definition or to NULL.
*/
static enum roff_tok
roff_parse(struct roff *r, char *buf, int *pos, int ln, int ppos)
@@ -2274,12 +2260,8 @@ roff_block_sub(ROFF_ARGS)
int i, j;
/*
- * First check whether a custom macro exists at this level. If
- * it does, then check against it. This is some of groff's
- * stranger behaviours. If we encountered a custom end-scope
- * tag and that tag also happens to be a "real" macro, then we
- * need to try interpreting it again as a real macro. If it's
- * not, then return ignore. Else continue.
+ * If a custom end marker is a user-defined or predefined macro
+ * or a request, interpret it.
*/
if (r->last->end) {
@@ -2305,20 +2287,17 @@ roff_block_sub(ROFF_ARGS)
}
}
- /*
- * If we have no custom end-query or lookup failed, then try
- * pulling it out of the hashtable.
- */
+ /* Handle the standard end marker. */
t = roff_parse(r, buf->buf, &pos, ln, ppos);
+ if (t == ROFF_cblock)
+ return roff_cblock(r, t, buf, ln, ppos, pos, offs);
- if (t != ROFF_cblock) {
- if (tok != ROFF_ig)
- roff_setstr(r, r->last->name, buf->buf + ppos, 2);
- return ROFF_IGN;
- }
+ /* Not an end marker, so append the line to the block. */
- return (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
+ if (tok != ROFF_ig)
+ roff_setstr(r, r->last->name, buf->buf + ppos, 2);
+ return ROFF_IGN;
}
static int
@@ -2398,27 +2377,18 @@ static int
roff_cond_sub(ROFF_ARGS)
{
struct roffnode *bl;
- int irc, rr;
+ int irc, rr, spos;
enum roff_tok t;
rr = 0; /* If arguments follow "\}", skip them. */
irc = roff_cond_checkend(r, tok, buf, ln, ppos, pos, &rr);
+ spos = pos;
t = roff_parse(r, buf->buf, &pos, ln, ppos);
- /* For now, let high level macros abort .ce mode. */
-
- if (roffce_node != NULL &&
- (t == TOKEN_NONE || t == ROFF_Dd || t == ROFF_EQ ||
- t == ROFF_TH || t == ROFF_TS)) {
- r->man->last = roffce_node;
- r->man->next = ROFF_NEXT_SIBLING;
- roffce_lines = 0;
- roffce_node = NULL;
- }
-
/*
- * Fully handle known macros when they are structurally
- * required or when the conditional evaluated to true.
+ * Handle requests and macros if the conditional evaluated
+ * to true or if they are structurally required.
+ * The .break request is always handled specially.
*/
if (t == ROFF_break) {
@@ -2431,11 +2401,11 @@ roff_cond_sub(ROFF_ARGS)
break;
}
}
- } else if (t != TOKEN_NONE &&
- (rr || roffs[t].flags & ROFFMAC_STRUCT))
- irc |= (*roffs[t].proc)(r, t, buf, ln, ppos, pos, offs);
- else
- irc |= rr ? ROFF_CONT : ROFF_IGN;
+ } else if (rr || (t < TOKEN_NONE && roffs[t].flags & ROFFMAC_STRUCT)) {
+ irc |= roff_req_or_macro(r, t, buf, ln, spos, pos, offs);
+ if (irc & ROFF_WHILE)
+ irc &= ~(ROFF_LOOPCONT | ROFF_LOOPEXIT);
+ }
return irc;
}
@@ -2532,7 +2502,7 @@ roff_getnum(const char *v, int *pos, int *res, int flags)
* Evaluate a string comparison condition.
* The first character is the delimiter.
* Succeed if the string up to its second occurrence
- * matches the string up to its third occurence.
+ * matches the string up to its third occurrence.
* Advance the cursor after the third occurrence
* or lacking that, to the end of the line.
*/
@@ -3729,6 +3699,54 @@ roff_eo(ROFF_ARGS)
}
static int
+roff_mc(ROFF_ARGS)
+{
+ struct roff_node *n;
+ char *cp;
+
+ /* Parse the first argument. */
+
+ cp = buf->buf + pos;
+ if (*cp != '\0')
+ cp++;
+ if (buf->buf[pos] == '\\') {
+ switch (mandoc_escape((const char **)&cp, NULL, NULL)) {
+ case ESCAPE_SPECIAL:
+ case ESCAPE_UNICODE:
+ case ESCAPE_NUMBERED:
+ break;
+ default:
+ *cp = '\0';
+ mandoc_msg(MANDOCERR_MC_ESC, ln, pos,
+ "mc %s", buf->buf + pos);
+ buf->buf[pos] = '\0';
+ break;
+ }
+ }
+
+ /* Ignore additional arguments. */
+
+ while (*cp == ' ')
+ *cp++ = '\0';
+ if (*cp != '\0') {
+ mandoc_msg(MANDOCERR_MC_DIST, ln, (int)(cp - buf->buf),
+ "mc ... %s", cp);
+ *cp = '\0';
+ }
+
+ /* Create the .mc node. */
+
+ roff_elem_alloc(r->man, ln, ppos, tok);
+ n = r->man->last;
+ if (buf->buf[pos] != '\0')
+ roff_word_alloc(r->man, ln, pos, buf->buf + pos);
+ n->flags |= NODE_LINE | NODE_VALID | NODE_ENDED;
+ r->man->last = n;
+ r->man->next = ROFF_NEXT_SIBLING;
+ return ROFF_IGN;
+}
+
+static int
roff_nop(ROFF_ARGS)
{
while (buf->buf[pos] == ' ')
@@ -3742,7 +3760,6 @@ roff_tr(ROFF_ARGS)
{
const char *p, *first, *second;
size_t fsz, ssz;
- enum mandoc_esc esc;
p = buf->buf + pos;
@@ -3756,23 +3773,15 @@ roff_tr(ROFF_ARGS)
first = p++;
if (*first == '\\') {
- esc = mandoc_escape(&p, NULL, NULL);
- if (esc == ESCAPE_ERROR) {
- mandoc_msg(MANDOCERR_ESC_BAD, ln,
- (int)(p - buf->buf), "%s", first);
+ if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
return ROFF_IGN;
- }
fsz = (size_t)(p - first);
}
second = p++;
if (*second == '\\') {
- esc = mandoc_escape(&p, NULL, NULL);
- if (esc == ESCAPE_ERROR) {
- mandoc_msg(MANDOCERR_ESC_BAD, ln,
- (int)(p - buf->buf), "%s", second);
+ if (mandoc_escape(&p, NULL, NULL) == ESCAPE_ERROR)
return ROFF_IGN;
- }
ssz = (size_t)(p - second);
} else if (*second == '\0') {
mandoc_msg(MANDOCERR_TR_ODD, ln,
@@ -3869,8 +3878,9 @@ static int
roff_shift(ROFF_ARGS)
{
struct mctx *ctx;
- int levels, i;
+ int argpos, levels, i;
+ argpos = pos;
levels = 1;
if (buf->buf[pos] != '\0' &&
roff_evalnum(r, ln, buf->buf, &pos, &levels, 0) == 0) {
@@ -3885,9 +3895,13 @@ roff_shift(ROFF_ARGS)
ctx = r->mstack + r->mstackpos;
if (levels > ctx->argc) {
mandoc_msg(MANDOCERR_SHIFT,
- ln, pos, "%d, but max is %d", levels, ctx->argc);
+ ln, argpos, "%d, but max is %d", levels, ctx->argc);
levels = ctx->argc;
}
+ if (levels < 0) {
+ mandoc_msg(MANDOCERR_ARG_NEG, ln, argpos, "shift %d", levels);
+ levels = 0;
+ }
if (levels == 0)
return ROFF_IGN;
for (i = 0; i < levels; i++)
@@ -3949,9 +3963,7 @@ roff_userdef(ROFF_ARGS)
r->mstacksz += 8;
}
ctx = r->mstack + r->mstackpos;
- ctx->argsz = 0;
ctx->argc = 0;
- ctx->argv = NULL;
/*
* Collect pointers to macro argument strings,
@@ -4017,7 +4029,7 @@ static size_t
roff_getname(struct roff *r, char **cpp, int ln, int pos)
{
char *name, *cp;
- size_t namesz;
+ int namesz, inam, iend;
name = *cpp;
if (*name == '\0')
@@ -4025,24 +4037,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos)
/* Advance cp to the byte after the end of the name. */
- for (cp = name; 1; cp++) {
- namesz = cp - name;
+ cp = name;
+ namesz = 0;
+ for (;;) {
if (*cp == '\0')
break;
if (*cp == ' ' || *cp == '\t') {
cp++;
break;
}
- if (*cp != '\\')
+ if (*cp != '\\') {
+ if (name + namesz < cp) {
+ name[namesz] = *cp;
+ *cp = ' ';
+ }
+ namesz++;
+ cp++;
continue;
+ }
if (cp[1] == '{' || cp[1] == '}')
break;
- if (*++cp == '\\')
- continue;
- mandoc_msg(MANDOCERR_NAMESC, ln, pos,
- "%.*s", (int)(cp - name + 1), name);
- mandoc_escape((const char **)&cp, NULL, NULL);
- break;
+ if (roff_escape(cp, 0, 0, NULL, &inam,
+ NULL, NULL, &iend) != ESCAPE_UNDEF) {
+ mandoc_msg(MANDOCERR_NAMESC, ln, pos,
+ "%.*s%.*s", namesz, name, iend, cp);
+ cp += iend;
+ break;
+ }
+
+ /*
+ * In an identifier, \\, \., \G and so on
+ * are reduced to \, ., G and so on,
+ * vaguely similar to copy mode.
+ */
+
+ name[namesz++] = cp[inam];
+ while (iend--) {
+ if (cp >= name + namesz)
+ *cp = ' ';
+ cp++;
+ }
}
/* Read past spaces. */
@@ -4360,7 +4394,7 @@ roff_getformat(const struct roff *r)
* return zero and don't change the current position.
* If the control character has been set with `.cc', then let that grain
* precedence.
- * This is slighly contrary to groff, where using the non-breaking
+ * This is slightly contrary to groff, where using the non-breaking
* control character when `cc' has been invoked will cause the
* non-breaking macro contents to be printed verbatim.
*/
diff --git a/roff.h b/roff.h
index 2933eb9c0bf8..48dba82474b9 100644
--- a/roff.h
+++ b/roff.h
@@ -1,6 +1,6 @@
-/* $Id: roff.h,v 1.74 2020/04/08 11:56:03 schwarze Exp $ */
+/* $Id: roff.h,v 1.76 2023/10/24 20:53:12 schwarze Exp $ */
/*
- * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013-2015,2017-2020,2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -71,7 +71,7 @@ enum roff_type {
};
enum roff_tok {
- ROFF_br = 0,
+ ROFF_br = 0, /* Beginning of roff(7) requests. */
ROFF_ce,
ROFF_fi,
ROFF_ft,
@@ -83,8 +83,8 @@ enum roff_tok {
ROFF_sp,
ROFF_ta,
ROFF_ti,
- ROFF_MAX,
- ROFF_ab,
+ ROFF_MAX, /* End of requests that generate nodes. */
+ ROFF_ab, /* Requests only used during preprocessing. */
ROFF_ad,
ROFF_af,
ROFF_aln,
@@ -313,11 +313,11 @@ enum roff_tok {
ROFF_writec,
ROFF_writem,
ROFF_xflag,
- ROFF_cblock,
- ROFF_RENAMED,
- ROFF_USERDEF,
- TOKEN_NONE,
- MDOC_Dd,
+ ROFF_cblock, /* Block end marker "..". */
+ ROFF_RENAMED, /* New name of a renamed request or macro. */
+ ROFF_USERDEF, /* User defined macro. */
+ TOKEN_NONE, /* Undefined macro or text/tbl/eqn/comment node. */
+ MDOC_Dd, /* Beginning of mdoc(7) macros. */
MDOC_Dt,
MDOC_Os,
MDOC_Sh,
@@ -438,8 +438,8 @@ enum roff_tok {
MDOC__U,
MDOC_Ta,
MDOC_Tg,
- MDOC_MAX,
- MAN_TH,
+ MDOC_MAX, /* End of mdoc(7) macros. */
+ MAN_TH, /* Beginning of man(7) macros. */
MAN_SH,
MAN_SS,
MAN_TP,
@@ -476,7 +476,8 @@ enum roff_tok {
MAN_UE,
MAN_MT,
MAN_ME,
- MAN_MAX
+ MAN_MR,
+ MAN_MAX /* End of man(7) macros. */
};
/*
diff --git a/roff_escape.c b/roff_escape.c
new file mode 100644
index 000000000000..011543df76c7
--- /dev/null
+++ b/roff_escape.c
@@ -0,0 +1,546 @@
+/* $Id: roff_escape.c,v 1.15 2024/05/16 21:23:00 schwarze Exp $ */
+/*
+ * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
+ * Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * Parser for roff(7) escape sequences.
+ * To be used by all mandoc(1) parsers and formatters.
+ */
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "mandoc.h"
+#include "roff.h"
+#include "roff_int.h"
+
+/*
+ * Traditional escape sequence interpreter for general use
+ * including in high-level formatters. This function does not issue
+ * diagnostics and is not usable for expansion in the roff(7) parser.
+ * It is documented in the mandoc_escape(3) manual page.
+ */
+enum mandoc_esc
+mandoc_escape(const char **rendarg, const char **rarg, int *rargl)
+{
+ int iarg, iendarg, iend;
+ enum mandoc_esc rval;
+
+ rval = roff_escape(--*rendarg, 0, 0,
+ NULL, NULL, &iarg, &iendarg, &iend);
+ assert(rval != ESCAPE_EXPAND);
+ if (rarg != NULL)
+ *rarg = *rendarg + iarg;
+ if (rargl != NULL)
+ *rargl = iendarg - iarg;
+ *rendarg += iend;
+ return rval;
+}
+
+/*
+ * Full-featured escape sequence parser.
+ * If it encounters a nested escape sequence that requires expansion
+ * by the parser and re-parsing, the positions of that inner escape
+ * sequence are returned in *resc ... *rend.
+ * Otherwise, *resc is set to aesc and the positions of the escape
+ * sequence starting at aesc are returned.
+ * Diagnostic messages are generated if and only if ln != 0,
+ * that is, if and only if called by roff_expand().
+ */
+enum mandoc_esc
+roff_escape(const char *buf, const int ln, const int aesc,
+ int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
+{
+ int iesc; /* index of leading escape char */
+ int inam; /* index of escape name */
+ int iarg; /* index beginning the argument */
+ int iendarg; /* index right after the argument */
+ int iend; /* index right after the sequence */
+ int sesc, snam, sarg, sendarg, send; /* for sub-escape */
+ int escterm; /* whether term is escaped */
+ int maxl; /* expected length of the argument */
+ int argl; /* actual length of the argument */
+ int c, i; /* for \[char...] parsing */
+ int valid_A; /* for \A parsing */
+ enum mandoc_esc rval; /* return value */
+ enum mandoc_esc stype; /* for sub-escape */
+ enum mandocerr err; /* diagnostic code */
+ char term; /* byte terminating the argument */
+
+ /*
+ * Treat "\E" just like "\";
+ * it only makes a difference in copy mode.
+ */
+
+ iesc = inam = aesc;
+ do {
+ inam++;
+ } while (buf[inam] == 'E');
+
+ /*
+ * Sort the following cases first by syntax category,
+ * then by escape sequence type, and finally by ASCII code.
+ */
+
+ iarg = iendarg = iend = inam + 1;
+ maxl = INT_MAX;
+ term = '\0';
+ err = MANDOCERR_OK;
+ switch (buf[inam]) {
+
+ /* Escape sequences taking no arguments at all. */
+
+ case '!':
+ case '?':
+ case 'r':
+ rval = ESCAPE_UNSUPP;
+ goto out;
+
+ case '%':
+ case '&':
+ case ')':
+ case ',':
+ case '/':
+ case '^':
+ case 'a':
+ case 'd':
+ case 't':
+ case 'u':
+ case '{':
+ case '|':
+ case '}':
+ rval = ESCAPE_IGNORE;
+ goto out;
+
+ case '\0':
+ iendarg = --iend;
+ /* FALLTHROUGH */
+ case '.':
+ case '\\':
+ default:
+ iarg--;
+ rval = ESCAPE_UNDEF;
+ goto out;
+
+ case ' ':
+ case '\'':
+ case '-':
+ case '0':
+ case ':':
+ case '_':
+ case '`':
+ case 'e':
+ case '~':
+ iarg--;
+ argl = 1;
+ rval = ESCAPE_SPECIAL;
+ goto out;
+ case 'p':
+ rval = ESCAPE_BREAK;
+ goto out;
+ case 'c':
+ rval = ESCAPE_NOSPACE;
+ goto out;
+ case 'z':
+ rval = ESCAPE_SKIPCHAR;
+ goto out;
+
+ /* Standard argument format. */
+
+ case '$':
+ case '*':
+ case 'V':
+ case 'g':
+ case 'n':
+ rval = ESCAPE_EXPAND;
+ break;
+ case 'F':
+ case 'M':
+ case 'O':
+ case 'Y':
+ case 'k':
+ case 'm':
+ rval = ESCAPE_IGNORE;
+ break;
+ case '(':
+ case '[':
+ rval = ESCAPE_SPECIAL;
+ iendarg = iend = --iarg;
+ break;
+ case 'f':
+ rval = ESCAPE_FONT;
+ break;
+
+ /* Quoted arguments */
+
+ case 'A':
+ case 'B':
+ case 'w':
+ rval = ESCAPE_EXPAND;
+ term = '\b';
+ break;
+ case 'D':
+ case 'H':
+ case 'L':
+ case 'R':
+ case 'S':
+ case 'X':
+ case 'Z':
+ case 'b':
+ case 'v':
+ case 'x':
+ rval = ESCAPE_IGNORE;
+ term = '\b';
+ break;
+ case 'C':
+ rval = ESCAPE_SPECIAL;
+ term = '\b';
+ break;
+ case 'N':
+ rval = ESCAPE_NUMBERED;
+ term = '\b';
+ break;
+ case 'h':
+ rval = ESCAPE_HORIZ;
+ term = '\b';
+ break;
+ case 'l':
+ rval = ESCAPE_HLINE;
+ term = '\b';
+ break;
+ case 'o':
+ rval = ESCAPE_OVERSTRIKE;
+ term = '\b';
+ break;
+
+ /* Sizes support both forms, with additional peculiarities. */
+
+ case 's':
+ rval = ESCAPE_IGNORE;
+ if (buf[iarg] == '+' || buf[iarg] == '-'||
+ buf[iarg] == ASCII_HYPH)
+ iarg++;
+ switch (buf[iarg]) {
+ case '(':
+ maxl = 2;
+ iarg++;
+ break;
+ case '[':
+ term = ']';
+ iarg++;
+ break;
+ case '\'':
+ term = '\'';
+ iarg++;
+ break;
+ case '1':
+ case '2':
+ case '3':
+ if (buf[iarg - 1] == 's' &&
+ isdigit((unsigned char)buf[iarg + 1])) {
+ maxl = 2;
+ break;
+ }
+ /* FALLTHROUGH */
+ default:
+ maxl = 1;
+ break;
+ }
+ iendarg = iend = iarg;
+ }
+
+ /* Decide how to end the argument. */
+
+ escterm = 0;
+ stype = ESCAPE_EXPAND;
+ if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
+ buf[iarg] == buf[iesc]) {
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
+ goto out_sub;
+ }
+
+ if (term == '\b') {
+ if (stype == ESCAPE_UNDEF)
+ iarg++;
+ if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
+ if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
+ strchr(" ,.0DLOXYZ^abdhlortuvx|~",
+ buf[snam]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ iend = send;
+ iarg = iendarg = sesc;
+ goto out;
+ }
+ escterm = 1;
+ iarg = send;
+ term = buf[snam];
+ } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
+ strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
+ err = MANDOCERR_ESC_DELIM;
+ if (rval != ESCAPE_EXPAND)
+ rval = ESCAPE_ERROR;
+ if (buf[inam] != 'D') {
+ iendarg = iend = iarg + 1;
+ goto out;
+ }
+ }
+ if (term == '\b')
+ term = buf[iarg++];
+ } else if (term == '\0' && maxl == INT_MAX) {
+ if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
+ iarg++;
+ switch (buf[iarg]) {
+ case '(':
+ maxl = 2;
+ iarg++;
+ break;
+ case '[':
+ if (buf[++iarg] == ' ') {
+ iendarg = iend = iarg + 1;
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ goto out;
+ }
+ term = ']';
+ break;
+ default:
+ maxl = 1;
+ break;
+ }
+ }
+
+ /* Advance to the end of the argument. */
+
+ valid_A = 1;
+ iendarg = iarg;
+ while (maxl > 0) {
+ if (buf[iendarg] == '\0') {
+ err = MANDOCERR_ESC_INCOMPLETE;
+ if (rval != ESCAPE_EXPAND &&
+ rval != ESCAPE_OVERSTRIKE)
+ rval = ESCAPE_ERROR;
+ /* Usually, ignore an incomplete argument. */
+ if (strchr("Aow", buf[inam]) == NULL)
+ iendarg = iarg;
+ break;
+ }
+ if (escterm == 0 && buf[iendarg] == term) {
+ iend = iendarg + 1;
+ break;
+ }
+ if (buf[iendarg] == buf[iesc]) {
+ stype = roff_escape(buf, ln, iendarg,
+ &sesc, &snam, &sarg, &sendarg, &send);
+ if (stype == ESCAPE_EXPAND)
+ goto out_sub;
+ iend = send;
+ if (escterm == 1 &&
+ (buf[snam] == term || buf[inam] == 'N'))
+ break;
+ if (stype != ESCAPE_UNDEF)
+ valid_A = 0;
+ iendarg = send;
+ } else if (buf[inam] == 'N' &&
+ isdigit((unsigned char)buf[iendarg]) == 0) {
+ iend = iendarg + 1;
+ break;
+ } else {
+ if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
+ valid_A = 0;
+ if (maxl != INT_MAX)
+ maxl--;
+ iend = ++iendarg;
+ }
+ }
+
+ /* Post-process depending on the content of the argument. */
+
+ argl = iendarg - iarg;
+ switch (buf[inam]) {
+ case '*':
+ if (resc == NULL && argl == 2 &&
+ buf[iarg] == '.' && buf[iarg + 1] == 'T')
+ rval = ESCAPE_DEVICE;
+ break;
+ case 'A':
+ if (valid_A == 0)
+ iendarg = iarg;
+ break;
+ case 'O':
+ switch (buf[iarg]) {
+ case '0':
+ rval = ESCAPE_UNSUPP;
+ break;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ if (argl == 1)
+ rval = ESCAPE_IGNORE;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
+ break;
+ case '5':
+ if (buf[iarg - 1] == '[')
+ rval = ESCAPE_UNSUPP;
+ else {
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ }
+ break;
+ default:
+ err = MANDOCERR_ESC_ARG;
+ rval = ESCAPE_ERROR;
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (rval) {
+ case ESCAPE_FONT:
+ rval = mandoc_font(buf + iarg, argl);
+ if (rval == ESCAPE_ERROR)
+ err = MANDOCERR_ESC_ARG;
+ break;
+
+ case ESCAPE_SPECIAL:
+ if (argl == 0) {
+ err = MANDOCERR_ESC_BADCHAR;
+ rval = ESCAPE_ERROR;
+ break;
+ }
+
+ /*
+ * The file chars.c only provides one common list of
+ * character names, but \[-] == \- is the only one of
+ * the characters with one-byte names that allows
+ * enclosing the name in brackets.
+ */
+
+ if (term != '\0' && argl == 1 && buf[iarg] != '-') {
+ err = MANDOCERR_ESC_BADCHAR;
+ rval = ESCAPE_ERROR;
+ break;
+ }
+
+ /* Treat \[char...] as an alias for \N'...'. */
+
+ if (buf[iarg] == 'c') {
+ if (argl < 6 || argl > 7 ||
+ strncmp(buf + iarg, "char", 4) != 0 ||
+ (int)strspn(buf + iarg + 4, "0123456789")
+ + 4 < argl)
+ break;
+ c = 0;
+ for (i = iarg; i < iendarg; i++)
+ c = 10 * c + (buf[i] - '0');
+ if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
+ err = MANDOCERR_ESC_BADCHAR;
+ break;
+ }
+ iarg += 4;
+ rval = ESCAPE_NUMBERED;
+ break;
+ }
+
+ /*
+ * Unicode escapes are defined in groff as \[u0000]
+ * to \[u10FFFF], where the contained value must be
+ * a valid Unicode codepoint.
+ */
+
+ if (buf[iarg] != 'u' || argl < 5 || argl > 7)
+ break;
+ if (argl == 7 && /* beyond the Unicode range */
+ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
+ err = MANDOCERR_ESC_BADCHAR;
+ break;
+ }
+ if (argl == 6 && buf[iarg + 1] == '0') {
+ err = MANDOCERR_ESC_BADCHAR;
+ break;
+ }
+ if (argl == 5 && /* UTF-16 surrogate */
+ toupper((unsigned char)buf[iarg + 1]) == 'D' &&
+ strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) {
+ err = MANDOCERR_ESC_BADCHAR;
+ break;
+ }
+ if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
+ + 1 == argl)
+ rval = ESCAPE_UNICODE;
+ break;
+ default:
+ break;
+ }
+ goto out;
+
+out_sub:
+ iesc = sesc;
+ inam = snam;
+ iarg = sarg;
+ iendarg = sendarg;
+ iend = send;
+ rval = ESCAPE_EXPAND;
+
+out:
+ if (resc != NULL)
+ *resc = iesc;
+ if (rnam != NULL)
+ *rnam = inam;
+ if (rarg != NULL)
+ *rarg = iarg;
+ if (rendarg != NULL)
+ *rendarg = iendarg;
+ if (rend != NULL)
+ *rend = iend;
+ if (ln == 0)
+ return rval;
+
+ /*
+ * Diagnostic messages are only issued when called
+ * from the parser, not when called from the formatters.
+ */
+
+ switch (rval) {
+ case ESCAPE_UNSUPP:
+ err = MANDOCERR_ESC_UNSUPP;
+ break;
+ case ESCAPE_UNDEF:
+ if (buf[inam] != '\\' && buf[inam] != '.')
+ err = MANDOCERR_ESC_UNDEF;
+ break;
+ case ESCAPE_SPECIAL:
+ if (mchars_spec2cp(buf + iarg, argl) >= 0)
+ err = MANDOCERR_OK;
+ else if (err == MANDOCERR_OK)
+ err = MANDOCERR_ESC_UNKCHAR;
+ break;
+ default:
+ break;
+ }
+ if (err != MANDOCERR_OK)
+ mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
+ return rval;
+}
diff --git a/roff_int.h b/roff_int.h
index e0700a742910..a26afa985bba 100644
--- a/roff_int.h
+++ b/roff_int.h
@@ -1,6 +1,6 @@
/* $OpenBSD: roff_int.h,v 1.16 2019/01/05 00:36:46 schwarze Exp $ */
/*
- * Copyright (c) 2013-2015, 2017-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
*
* Permission to use, copy, modify, and distribute this software for any
@@ -35,6 +35,7 @@ struct roff_man {
struct ohash *mdocmac; /* Mdoc macro lookup table. */
struct ohash *manmac; /* Man macro lookup table. */
const char *os_s; /* Default operating system. */
+ char *os_r; /* Operating system name at run time. */
struct roff_node *last; /* The last node parsed. */
struct roff_node *last_es; /* The most recent Es node. */
int quick; /* Abort parse early. */
@@ -81,6 +82,8 @@ struct ohash *roffhash_alloc(enum roff_tok, enum roff_tok);
enum roff_tok roffhash_find(struct ohash *, const char *, size_t);
void roffhash_free(struct ohash *);
+enum mandoc_esc roff_escape(const char *, const int, const int,
+ int *, int *, int *, int *, int *);
void roff_state_reset(struct roff_man *);
void roff_validate(struct roff_man *);
diff --git a/roff_term.c b/roff_term.c
index 115d850fb261..f696898ebd5a 100644
--- a/roff_term.c
+++ b/roff_term.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: roff_term.c,v 1.20 2020/09/03 17:37:06 schwarze Exp $ */
+/* $Id: roff_term.c,v 1.25 2023/04/28 19:11:04 schwarze Exp $ */
/*
* Copyright (c) 2010,2014,2015,2017-2020 Ingo Schwarze <schwarze@openbsd.org>
*
@@ -175,7 +175,7 @@ roff_term_pre_po(ROFF_TERM_ARGS)
} else
ponew = polast;
- /* Remeber both the previous and the newly requested offset. */
+ /* Remember both the previous and the newly requested offset. */
polast = po;
po = ponew;
diff --git a/st.c b/st.c
index c4d86e33feb0..4cff3e7caca5 100644
--- a/st.c
+++ b/st.c
@@ -1,4 +1,4 @@
-/* $Id: st.c,v 1.16 2018/12/14 01:18:26 schwarze Exp $ */
+/* $Id: st.c,v 1.18 2024/06/16 18:49:04 schwarze Exp $ */
/*
* Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
*
@@ -37,6 +37,7 @@ LINE("-p1003.1-96", "ISO/IEC 9945-1:1996 (\\(lqPOSIX.1\\(rq)")
LINE("-p1003.1-2001", "IEEE Std 1003.1-2001 (\\(lqPOSIX.1\\(rq)")
LINE("-p1003.1-2004", "IEEE Std 1003.1-2004 (\\(lqPOSIX.1\\(rq)")
LINE("-p1003.1-2008", "IEEE Std 1003.1-2008 (\\(lqPOSIX.1\\(rq)")
+LINE("-p1003.1-2024", "IEEE Std 1003.1-2024 (\\(lqPOSIX.1\\(rq)")
LINE("-p1003.1", "IEEE Std 1003.1 (\\(lqPOSIX.1\\(rq)")
LINE("-p1003.1b", "IEEE Std 1003.1b (\\(lqPOSIX.1b\\(rq)")
LINE("-p1003.1b-93", "IEEE Std 1003.1b-1993 (\\(lqPOSIX.1b\\(rq)")
@@ -67,7 +68,6 @@ LINE("-xpg4", "X/Open Portability Guide Issue\\~4 (\\(lqXPG4\\(rq)")
LINE("-xpg4.2", "X/Open Portability Guide Issue\\~4, Version\\~2 (\\(lqXPG4.2\\(rq)")
LINE("-xbd5", "X/Open Base Definitions Issue\\~5 (\\(lqXBD5\\(rq)")
LINE("-xcu5", "X/Open Commands and Utilities Issue\\~5 (\\(lqXCU5\\(rq)")
-LINE("-xsh4.2", "X/Open System Interfaces and Headers Issue\\~4, Version\\~2 (\\(lqXSH4.2\\(rq)")
LINE("-xsh5", "X/Open System Interfaces and Headers Issue\\~5 (\\(lqXSH5\\(rq)")
LINE("-xns5", "X/Open Networking Services Issue\\~5 (\\(lqXNS5\\(rq)")
LINE("-xns5.2", "X/Open Networking Services Issue\\~5.2 (\\(lqXNS5.2\\(rq)")
diff --git a/tag.c b/tag.c
index fcaad99f2e2e..7dcd5758705f 100644
--- a/tag.c
+++ b/tag.c
@@ -1,6 +1,7 @@
-/* $Id: tag.c,v 1.36 2020/04/19 16:36:16 schwarze Exp $ */
+/* $Id: tag.c,v 1.38 2023/11/24 05:02:18 schwarze Exp $ */
/*
- * Copyright (c) 2015,2016,2018,2019,2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
+ * Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -25,11 +26,13 @@
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mandoc_aux.h"
#include "mandoc_ohash.h"
+#include "mandoc.h"
#include "roff.h"
#include "mdoc.h"
#include "roff_int.h"
@@ -80,19 +83,34 @@ tag_free(void)
/*
* Set a node where a term is defined,
- * unless it is already defined at a lower priority.
+ * unless the term is already defined at a lower priority.
*/
void
tag_put(const char *s, int prio, struct roff_node *n)
{
struct tag_entry *entry;
struct roff_node *nold;
- const char *se;
+ const char *se, *src;
+ char *cpy;
size_t len;
unsigned int slot;
+ int changed;
assert(prio <= TAG_FALLBACK);
+ /*
+ * If the node is already tagged, the existing tag is
+ * explicit and we are now about to add an implicit tag.
+ * Don't do that; just skip implicit tagging if the author
+ * specified an explicit tag.
+ */
+
+ if (n->flags & NODE_ID)
+ return;
+
+ /* Determine the implicit tag. */
+
+ changed = 1;
if (s == NULL) {
if (n->child == NULL || n->child->type != ROFFT_TEXT)
return;
@@ -109,27 +127,53 @@ tag_put(const char *s, int prio, struct roff_node *n)
s += 2;
break;
default:
- break;
+ return;
}
break;
default:
+ changed = 0;
break;
}
}
/*
+ * Translate \- and ASCII_HYPH to plain '-'.
* Skip whitespace and escapes and whatever follows,
* and if there is any, downgrade the priority.
*/
- len = strcspn(s, " \t\\");
+ cpy = mandoc_malloc(strlen(s) + 1);
+ for (src = s, len = 0; *src != '\0'; src++, len++) {
+ switch (*src) {
+ case '\t':
+ case ' ':
+ changed = 1;
+ break;
+ case ASCII_HYPH:
+ cpy[len] = '-';
+ changed = 1;
+ continue;
+ case '\\':
+ if (src[1] != '-')
+ break;
+ src++;
+ changed = 1;
+ /* FALLTHROUGH */
+ default:
+ cpy[len] = *src;
+ continue;
+ }
+ break;
+ }
if (len == 0)
- return;
+ goto out;
+ cpy[len] = '\0';
- se = s + len;
- if (*se != '\0' && prio < TAG_WEAK)
+ if (*src != '\0' && prio < TAG_WEAK)
prio = TAG_WEAK;
+ s = cpy;
+ se = cpy + len;
slot = ohash_qlookupi(&tag_data, s, &se);
entry = ohash_find(&tag_data, slot);
@@ -137,8 +181,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
if (entry == NULL) {
entry = mandoc_malloc(sizeof(*entry) + len + 1);
- memcpy(entry->s, s, len);
- entry->s[len] = '\0';
+ memcpy(entry->s, s, len + 1);
entry->nodes = NULL;
entry->maxnodes = entry->nnodes = 0;
ohash_insert(&tag_data, slot, entry);
@@ -150,7 +193,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
*/
else if (entry->prio < prio)
- return;
+ goto out;
/*
* If the existing entry is worse, clear it.
@@ -167,7 +210,7 @@ tag_put(const char *s, int prio, struct roff_node *n)
}
if (prio == TAG_FALLBACK) {
entry->prio = TAG_DELETE;
- return;
+ goto out;
}
}
@@ -181,10 +224,13 @@ tag_put(const char *s, int prio, struct roff_node *n)
entry->nodes[entry->nnodes++] = n;
entry->prio = prio;
n->flags |= NODE_ID;
- if (n->child == NULL || n->child->string != s || *se != '\0') {
+ if (changed) {
assert(n->tag == NULL);
n->tag = mandoc_strndup(s, len);
}
+
+ out:
+ free(cpy);
}
int
diff --git a/tbl.7 b/tbl.7
index 4ecc354c4ea3..2b2faea5a190 100644
--- a/tbl.7
+++ b/tbl.7
@@ -1,4 +1,4 @@
-.\" $Id: tbl.7,v 1.37 2021/09/18 12:34:27 schwarze Exp $
+.\" $Id: tbl.7,v 1.39 2022/08/28 13:52:59 schwarze Exp $
.\"
.\" Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
.\" Copyright (c) 2014,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org>
@@ -15,7 +15,7 @@
.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
.\"
-.Dd $Mdocdate: September 18 2021 $
+.Dd $Mdocdate: August 28 2022 $
.Dt TBL 7
.Os
.Sh NAME
@@ -333,7 +333,7 @@ Ruby:1.8.7.374
TeX Live:2015
.TE
.Ed
-.sp 2v
+.Pp
Spans and skipping width calculations:
.Bd -literal -offset indent
\&.TS
@@ -357,7 +357,7 @@ l:center:
:right
.TE
.Ed
-.sp 2v
+.Pp
Text blocks, specifying spacings and specifying and equalizing
column widths, putting lines into individual cells, and overriding
.Cm allbox :
@@ -367,7 +367,7 @@ allbox tab(:);
le le||7 lw10.
The fourth line:_:line 1
of this column:=:line 2
-determines:\_:line 3
+determines:\e_:line 3
the column width.:T{
This text is too wide to fit into a column of width 17.
T}:line 4
@@ -391,7 +391,7 @@ No break here.
T}::line 5
.TE
.Ed
-.sp 2v
+.Pp
These examples were constructed to demonstrate many
.Nm
features in a compact way.
diff --git a/tbl_html.c b/tbl_html.c
index 65c8ae8f887c..57d90c4c2d67 100644
--- a/tbl_html.c
+++ b/tbl_html.c
@@ -1,7 +1,8 @@
-/* $Id: tbl_html.c,v 1.38 2021/09/09 16:52:52 schwarze Exp $ */
+/* $Id: tbl_html.c,v 1.41 2022/04/23 14:02:17 schwarze Exp $ */
/*
+ * Copyright (c) 2014, 2015, 2017, 2018, 2021, 2022
+ * Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2014,2015,2017,2018,2021 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -24,6 +25,9 @@
#include <stdlib.h>
#include <string.h>
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
#include "mandoc.h"
#include "roff.h"
#include "tbl.h"
@@ -134,7 +138,7 @@ print_tbl(struct html *h, const struct tbl_span *sp)
*/
if (sp->pos != TBL_SPAN_DATA)
- return;
+ goto out;
/* Inhibit printing of spaces: we do padding ourselves. */
@@ -247,7 +251,9 @@ print_tbl(struct html *h, const struct tbl_span *sp)
if (dp->layout->pos == TBL_CELL_HORIZ ||
dp->layout->pos == TBL_CELL_DHORIZ ||
dp->pos == TBL_DATA_HORIZ ||
- dp->pos == TBL_DATA_DHORIZ)
+ dp->pos == TBL_DATA_NHORIZ ||
+ dp->pos == TBL_DATA_DHORIZ ||
+ dp->pos == TBL_DATA_NDHORIZ)
print_otag(h, TAG_HR, "");
else if (dp->string != NULL) {
save_font = h->metac;
@@ -284,6 +290,7 @@ print_tbl(struct html *h, const struct tbl_span *sp)
h->flags &= ~HTML_NONOSPACE;
+out:
if (sp->next == NULL) {
assert(h->tbl.cols);
free(h->tbl.cols);
diff --git a/tbl_term.c b/tbl_term.c
index eac125586c4d..e92349514d9f 100644
--- a/tbl_term.c
+++ b/tbl_term.c
@@ -1,7 +1,7 @@
-/* $Id: tbl_term.c,v 1.75 2021/08/10 12:55:04 schwarze Exp $ */
+/* $Id: tbl_term.c,v 1.79 2022/08/28 10:58:31 schwarze Exp $ */
/*
+ * Copyright (c) 2011-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2009, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011-2021 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -25,6 +25,9 @@
#include <stdlib.h>
#include <string.h>
+#if DEBUG_MEMORY
+#include "mandoc_dbg.h"
+#endif
#include "mandoc.h"
#include "tbl.h"
#include "out.h"
@@ -290,6 +293,7 @@ term_tbl(struct termp *tp, const struct tbl_span *sp)
}
tp->tcol++;
tp->col = 0;
+ tp->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE);
tbl_data(tp, sp->opts, cp, dp, tp->tbl.cols + ic);
if (dp != NULL &&
(ic || sp->layout->first->pos != TBL_CELL_SPAN)) {
@@ -545,15 +549,11 @@ term_tbl(struct termp *tp, const struct tbl_span *sp)
tp->flags &= ~TERMP_MULTICOL;
tp->tcol->rmargin = tp->maxrmargin;
if (sp->next == NULL) {
- if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX)) {
+ if (sp->opts->opts & (TBL_OPT_DBOX | TBL_OPT_BOX))
tbl_hrule(tp, sp, sp, NULL, TBL_OPT_BOX);
- tp->skipvsp = 1;
- }
if (tp->enc == TERMENC_ASCII &&
- sp->opts->opts & TBL_OPT_DBOX) {
+ sp->opts->opts & TBL_OPT_DBOX)
tbl_hrule(tp, sp, sp, NULL, TBL_OPT_DBOX);
- tp->skipvsp = 2;
- }
assert(tp->tbl.cols);
free(tp->tbl.cols);
tp->tbl.cols = NULL;
@@ -820,8 +820,11 @@ tbl_literal(struct termp *tp, const struct tbl_dat *dp,
width = col->width;
ic = dp->layout->col;
hspans = dp->hspans;
- while (hspans--)
- width += tp->tbl.cols[++ic].width + 3;
+ while (hspans--) {
+ width += tp->tbl.cols[ic].spacing;
+ ic++;
+ width += tp->tbl.cols[ic].width;
+ }
padr = width > len ? width - len : 0;
padl = 0;
diff --git a/term.c b/term.c
index fb0351d91f1d..58d9d9bf9240 100644
--- a/term.c
+++ b/term.c
@@ -1,7 +1,7 @@
-/* $Id: term.c,v 1.283 2021/08/10 12:55:04 schwarze Exp $ */
+/* $Id: term.c,v 1.291 2023/04/28 19:11:04 schwarze Exp $ */
/*
+ * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -58,6 +58,7 @@ term_setcol(struct termp *p, size_t maxtcol)
void
term_free(struct termp *p)
{
+ term_tab_free();
for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
free(p->tcol->buf);
free(p->tcols);
@@ -156,6 +157,11 @@ term_flushln(struct termp *p)
/* Finally, print the field content. */
term_field(p, vbl, nbr);
+ if (vbr < vtarget)
+ p->tcol->taboff += vbr;
+ else
+ p->tcol->taboff += vtarget;
+ p->tcol->taboff += (*p->width)(p, ' ');
/*
* If there is no text left in the field, exit the loop.
@@ -175,7 +181,9 @@ term_flushln(struct termp *p)
vbr += (*p->width)(p, ' ');
continue;
case '\n':
+ case ASCII_NBRZW:
case ASCII_BREAK:
+ case ASCII_TABREF:
continue;
default:
break;
@@ -186,7 +194,7 @@ term_flushln(struct termp *p)
break;
/*
- * At the location of an automtic line break, input
+ * At the location of an automatic line break, input
* space characters are consumed by the line break.
*/
@@ -206,7 +214,6 @@ term_flushln(struct termp *p)
return;
endline(p);
- p->viscol = 0;
/*
* Normally, start the next line at the same indentation
@@ -257,9 +264,11 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
size_t vn; /* Visual position of the next character. */
int breakline; /* Break at the end of this word. */
int graph; /* Last character was non-blank. */
+ int taboff; /* Temporary offset for literal tabs. */
*nbr = *vbr = vis = 0;
breakline = graph = 0;
+ taboff = p->tcol->taboff;
for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
switch (p->tcol->buf[ic]) {
case '\b': /* Escape \o (overstrike) or backspace markup. */
@@ -267,22 +276,11 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
continue;
- case '\t': /* Normal ASCII whitespace. */
case ' ':
case ASCII_BREAK: /* Escape \: (breakpoint). */
- switch (p->tcol->buf[ic]) {
- case '\t':
- vn = term_tab_next(vis);
- break;
- case ' ':
- vn = vis + (*p->width)(p, ' ');
- break;
- case ASCII_BREAK:
- vn = vis;
- break;
- default:
- abort();
- }
+ vn = vis;
+ if (p->tcol->buf[ic] == ' ')
+ vn += (*p->width)(p, ' ');
/* Can break at the end of a word. */
if (breakline || vn > vtarget)
break;
@@ -316,12 +314,30 @@ term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
*vbr = vis;
continue;
- case ASCII_NBRSP: /* Non-breakable space. */
- p->tcol->buf[ic] = ' ';
- /* FALLTHROUGH */
- default: /* Printable character. */
+ case ASCII_TABREF:
+ taboff = -vis - (*p->width)(p, ' ');
+ continue;
+
+ default:
+ switch (p->tcol->buf[ic]) {
+ case '\t':
+ if (taboff < 0 && (size_t)-taboff > vis)
+ vis = 0;
+ else
+ vis += taboff;
+ vis = term_tab_next(vis);
+ vis -= taboff;
+ break;
+ case ASCII_NBRZW: /* Non-breakable zero-width. */
+ break;
+ case ASCII_NBRSP: /* Non-breakable space. */
+ p->tcol->buf[ic] = ' ';
+ /* FALLTHROUGH */
+ default: /* Printable character. */
+ vis += (*p->width)(p, p->tcol->buf[ic]);
+ break;
+ }
graph = 1;
- vis += (*p->width)(p, p->tcol->buf[ic]);
if (vis > vtarget && *nbr > 0)
return;
continue;
@@ -351,10 +367,12 @@ term_field(struct termp *p, size_t vbl, size_t nbr)
{
size_t ic; /* Character position in the input buffer. */
size_t vis; /* Visual position of the current character. */
+ size_t vt; /* Visual position including tab offset. */
size_t dv; /* Visual width of the current character. */
- size_t vn; /* Visual position of the next character. */
+ int taboff; /* Temporary offset for literal tabs. */
vis = 0;
+ taboff = p->tcol->taboff;
for (ic = p->tcol->col; ic < nbr; ic++) {
/*
@@ -365,15 +383,22 @@ term_field(struct termp *p, size_t vbl, size_t nbr)
switch (p->tcol->buf[ic]) {
case '\n':
case ASCII_BREAK:
+ case ASCII_NBRZW:
continue;
- case '\t':
- vn = term_tab_next(vis);
- vbl += vn - vis;
- vis = vn;
+ case ASCII_TABREF:
+ taboff = -vis - (*p->width)(p, ' ');
continue;
+ case '\t':
case ' ':
case ASCII_NBRSP:
- dv = (*p->width)(p, ' ');
+ if (p->tcol->buf[ic] == '\t') {
+ if (taboff < 0 && (size_t)-taboff > vis)
+ vt = 0;
+ else
+ vt = vis + taboff;
+ dv = term_tab_next(vt) - vt;
+ } else
+ dv = (*p->width)(p, ' ');
vbl += dv;
vis += dv;
continue;
@@ -435,10 +460,10 @@ endline(struct termp *p)
void
term_newln(struct termp *p)
{
-
p->flags |= TERMP_NOSPACE;
if (p->tcol->lastcol || p->viscol)
term_flushln(p);
+ p->tcol->taboff = 0;
}
/*
@@ -571,18 +596,23 @@ term_word(struct termp *p, const char *word)
break;
case ESCAPE_NUMBERED:
uc = mchars_num2char(seq, sz);
- if (uc < 0)
- continue;
- break;
+ if (uc >= 0)
+ break;
+ bufferc(p, ASCII_NBRZW);
+ continue;
case ESCAPE_SPECIAL:
if (p->enc == TERMENC_ASCII) {
cp = mchars_spec2str(seq, sz, &ssz);
if (cp != NULL)
encode(p, cp, ssz);
+ else
+ bufferc(p, ASCII_NBRZW);
} else {
uc = mchars_spec2cp(seq, sz);
if (uc > 0)
encode1(p, uc);
+ else
+ bufferc(p, ASCII_NBRZW);
}
continue;
case ESCAPE_UNDEF:
@@ -627,6 +657,10 @@ term_word(struct termp *p, const char *word)
encode(p, "utf8", 4);
continue;
case ESCAPE_HORIZ:
+ if (p->flags & TERMP_BACKAFTER) {
+ p->flags &= ~TERMP_BACKAFTER;
+ continue;
+ }
if (*seq == '|') {
seq++;
uc = -p->col;
@@ -635,12 +669,24 @@ term_word(struct termp *p, const char *word)
if (a2roffsu(seq, &su, SCALE_EM) == NULL)
continue;
uc += term_hen(p, &su);
- if (uc > 0)
- while (uc-- > 0)
- bufferc(p, ASCII_NBRSP);
- else if (p->col > (size_t)(-uc))
+ if (uc >= 0) {
+ while (uc > 0) {
+ uc -= term_len(p, 1);
+ if (p->flags & TERMP_BACKBEFORE)
+ p->flags &= ~TERMP_BACKBEFORE;
+ else
+ bufferc(p, ASCII_NBRSP);
+ }
+ continue;
+ }
+ if (p->flags & TERMP_BACKBEFORE) {
+ p->flags &= ~TERMP_BACKBEFORE;
+ assert(p->col > 0);
+ p->col--;
+ }
+ if (p->col >= (size_t)(-uc)) {
p->col += uc;
- else {
+ } else {
uc += p->col;
p->col = 0;
if (p->tcol->offset > (size_t)(-uc)) {
@@ -728,6 +774,9 @@ term_word(struct termp *p, const char *word)
if (p->col > p->tcol->lastcol)
p->col = p->tcol->lastcol;
continue;
+ case ESCAPE_IGNORE:
+ bufferc(p, ASCII_NBRZW);
+ continue;
default:
continue;
}
@@ -775,6 +824,14 @@ bufferc(struct termp *p, char c)
p->tcol->lastcol = p->col;
}
+void
+term_tab_ref(struct termp *p)
+{
+ if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
+ (p->flags & TERMP_NOBUF) == 0)
+ bufferc(p, ASCII_TABREF);
+}
+
/*
* See encode().
* Do this for a single (probably unicode) value.
@@ -919,8 +976,8 @@ term_strlen(const struct termp *p, const char *cp)
int ssz, skip, uc;
const char *seq, *rhs;
enum mandoc_esc esc;
- static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
- ASCII_BREAK, '\0' };
+ static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
+ ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
/*
* Account for escaped sequences within string length
diff --git a/term.h b/term.h
index f0a033a46f31..3b3a79527eeb 100644
--- a/term.h
+++ b/term.h
@@ -1,7 +1,7 @@
-/* $Id: term.h,v 1.131 2019/01/04 03:21:02 schwarze Exp $ */
+/* $Id: term.h,v 1.134 2022/08/16 17:45:55 schwarze Exp $ */
/*
+ * Copyright (c) 2011-2015,2017,2019,2022 Ingo Schwarze <schwarze@openbsd.org>
* Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2011-2015, 2017, 2019 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -56,6 +56,7 @@ struct termp_col {
size_t col; /* Byte in buf to be written. */
size_t rmargin; /* Current right margin. */
size_t offset; /* Current left margin. */
+ size_t taboff; /* Offset for literal tabs. */
};
struct termp {
@@ -149,7 +150,9 @@ size_t term_len(const struct termp *, size_t);
void term_tab_set(const struct termp *, const char *);
void term_tab_iset(size_t);
+void term_tab_ref(struct termp *);
size_t term_tab_next(size_t);
+void term_tab_free(void);
void term_fontpush(struct termp *, enum termfont);
void term_fontpop(struct termp *);
diff --git a/term_ascii.c b/term_ascii.c
index bf7e9b639e04..3942dc757953 100644
--- a/term_ascii.c
+++ b/term_ascii.c
@@ -1,4 +1,4 @@
-/* $Id: term_ascii.c,v 1.66 2020/09/09 13:45:05 schwarze Exp $ */
+/* $Id: term_ascii.c,v 1.69 2023/11/13 19:13:01 schwarze Exp $ */
/*
* Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
* Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
@@ -72,6 +72,7 @@ ascii_init(enum termenc enc, const struct manoutput *outopts)
p->maxtcol = 1;
p->line = 1;
+ p->defindent = 5;
p->defrmargin = p->lastrmargin = 78;
p->fontq = mandoc_reallocarray(NULL,
(p->fontsz = 8), sizeof(*p->fontq));
@@ -122,10 +123,8 @@ ascii_init(enum termenc enc, const struct manoutput *outopts)
}
#endif
- if (outopts->mdoc) {
+ if (outopts->mdoc)
p->mdocstyle = 1;
- p->defindent = 5;
- }
if (outopts->indent)
p->defindent = outopts->indent;
if (outopts->width)
@@ -196,7 +195,7 @@ terminal_sepline(void *arg)
static size_t
ascii_width(const struct termp *p, int c)
{
- return c != ASCII_BREAK;
+ return c != ASCII_BREAK && c != ASCII_NBRZW && c != ASCII_TABREF;
}
void
diff --git a/term_tab.c b/term_tab.c
index 84b4c00c6e65..a2d1074159b9 100644
--- a/term_tab.c
+++ b/term_tab.c
@@ -1,6 +1,6 @@
-/* $Id: term_tab.c,v 1.6 2020/06/22 19:20:40 schwarze Exp $ */
+/* $Id: term_tab.c,v 1.7 2021/10/04 18:56:31 schwarze Exp $ */
/*
- * Copyright (c) 2017 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2017, 2021 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -19,6 +19,8 @@
#include <sys/types.h>
#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
#include "mandoc_aux.h"
#include "out.h"
@@ -33,6 +35,7 @@ struct tablist {
static struct {
struct tablist a; /* All tab positions for lookup. */
struct tablist p; /* Periodic tab positions to add. */
+ struct tablist *r; /* Tablist currently being recorded. */
size_t d; /* Default tab width in units of n. */
} tabs;
@@ -40,8 +43,6 @@ static struct {
void
term_tab_set(const struct termp *p, const char *arg)
{
- static int recording_period;
-
struct roffsu su;
struct tablist *tl;
size_t pos;
@@ -51,7 +52,7 @@ term_tab_set(const struct termp *p, const char *arg)
if (arg == NULL) {
tabs.a.n = tabs.p.n = 0;
- recording_period = 0;
+ tabs.r = &tabs.a;
if (tabs.d == 0) {
a2roffsu(".8i", &su, SCALE_IN);
tabs.d = term_hen(p, &su);
@@ -59,7 +60,7 @@ term_tab_set(const struct termp *p, const char *arg)
return;
}
if (arg[0] == 'T' && arg[1] == '\0') {
- recording_period = 1;
+ tabs.r = &tabs.p;
return;
}
@@ -75,7 +76,7 @@ term_tab_set(const struct termp *p, const char *arg)
/* Select the list, and extend it if it is full. */
- tl = recording_period ? &tabs.p : &tabs.a;
+ tl = tabs.r;
if (tl->n >= tl->s) {
tl->s += 8;
tl->t = mandoc_reallocarray(tl->t, tl->s, sizeof(*tl->t));
@@ -128,3 +129,12 @@ term_tab_next(size_t prev)
return tabs.a.t[i];
}
}
+
+void
+term_tab_free(void)
+{
+ free(tabs.a.t);
+ free(tabs.p.t);
+ memset(&tabs, 0, sizeof(tabs));
+ tabs.r = &tabs.a;
+}
diff --git a/tree.c b/tree.c
index 12e841a50b78..7c1c662d234c 100644
--- a/tree.c
+++ b/tree.c
@@ -1,7 +1,7 @@
-/* $Id: tree.c,v 1.91 2021/09/07 10:59:18 schwarze Exp $ */
+/* $Id: tree.c,v 1.92 2022/01/12 04:54:05 schwarze Exp $ */
/*
* Copyright (c) 2008, 2009, 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
- * Copyright (c) 2013-2015, 2017-2021 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2013-2015, 2017-2022 Ingo Schwarze <schwarze@openbsd.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -509,12 +509,16 @@ print_span(const struct tbl_span *sp, int indent)
putchar('x');
}
switch (dp->pos) {
- case TBL_DATA_HORIZ:
case TBL_DATA_NHORIZ:
- putchar('-');
+ putchar('\\');
+ /* FALLTHROUGH */
+ case TBL_DATA_HORIZ:
+ putchar('_');
break;
- case TBL_DATA_DHORIZ:
case TBL_DATA_NDHORIZ:
+ putchar('\\');
+ /* FALLTHROUGH */
+ case TBL_DATA_DHORIZ:
putchar('=');
break;
default: