aboutsummaryrefslogtreecommitdiff
path: root/usr.bin
diff options
context:
space:
mode:
authorTim J. Robbins <tjr@FreeBSD.org>2004-07-11 01:11:12 +0000
committerTim J. Robbins <tjr@FreeBSD.org>2004-07-11 01:11:12 +0000
commit40ccfb3137698ded12c4e40e953f337e6bb14cfa (patch)
tree6d4f26416ae7246c1e3dab0c7a4b36833ce1aef6 /usr.bin
parent7602de354fee3a5f1d2c69dfef205930b271a087 (diff)
downloadsrc-40ccfb3137698ded12c4e40e953f337e6bb14cfa.tar.gz
src-40ccfb3137698ded12c4e40e953f337e6bb14cfa.zip
Add POSIX-style support for multibyte characters to od(1): the 'c'
conversion interprets input bytes as multibyte sequences and displays printable characters in the area corresponding to their first byte. The remaining bytes are shown as "**".
Notes
Notes: svn path=/head/; revision=131954
Diffstat (limited to 'usr.bin')
-rw-r--r--usr.bin/hexdump/conv.c67
-rw-r--r--usr.bin/hexdump/display.c32
-rw-r--r--usr.bin/hexdump/hexdump.h7
-rw-r--r--usr.bin/hexdump/od.119
4 files changed, 114 insertions, 11 deletions
diff --git a/usr.bin/hexdump/conv.c b/usr.bin/hexdump/conv.c
index 581b66631626..5d8ade9ed0fc 100644
--- a/usr.bin/hexdump/conv.c
+++ b/usr.bin/hexdump/conv.c
@@ -39,15 +39,30 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
+#include <assert.h>
#include <stdio.h>
#include <ctype.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include <wctype.h>
#include "hexdump.h"
void
-conv_c(PR *pr, u_char *p)
+conv_c(PR *pr, u_char *p, size_t bufsize)
{
char buf[10];
char const *str;
+ wchar_t wc;
+ size_t clen, oclen;
+ int converr, pad, width;
+ char peekbuf[MB_LEN_MAX];
+
+ if (pr->mbleft > 0) {
+ str = "**";
+ pr->mbleft--;
+ goto strpr;
+ }
switch(*p) {
case '\0':
@@ -78,9 +93,53 @@ conv_c(PR *pr, u_char *p)
default:
break;
}
- if (isprint(*p)) {
- *pr->cchar = 'c';
- (void)printf(pr->fmt, *p);
+ /*
+ * Multibyte characters are disabled for hexdump(1) for backwards
+ * compatibility and consistency (none of its other output formats
+ * recognize them correctly).
+ */
+ converr = 0;
+ if (odmode && MB_CUR_MAX > 1) {
+ oclen = 0;
+retry:
+ clen = mbrtowc(&wc, p, bufsize, &pr->mbstate);
+ if (clen == 0)
+ clen = 1;
+ else if (clen == (size_t)-1 || (clen == (size_t)-2 &&
+ buf == peekbuf)) {
+ memset(&pr->mbstate, 0, sizeof(pr->mbstate));
+ wc = *p;
+ clen = 1;
+ converr = 1;
+ } else if (clen == (size_t)-2) {
+ /*
+ * Incomplete character; peek ahead and see if we
+ * can complete it.
+ */
+ oclen = bufsize;
+ bufsize = peek(p = peekbuf, MB_CUR_MAX);
+ goto retry;
+ }
+ clen += oclen;
+ } else {
+ wc = *p;
+ clen = 1;
+ }
+ if (!converr && iswprint(wc)) {
+ if (!odmode) {
+ *pr->cchar = 'c';
+ (void)printf(pr->fmt, (int)wc);
+ } else {
+ *pr->cchar = 'C';
+ assert(strcmp(pr->fmt, "%3C") == 0);
+ width = wcwidth(wc);
+ assert(width > 0);
+ pad = 3 - width;
+ if (pad < 0)
+ pad = 0;
+ (void)printf("%*s%C", pad, "", wc);
+ pr->mbleft = clen - 1;
+ }
} else {
(void)sprintf(buf, "%03o", (int)*p);
str = buf;
diff --git a/usr.bin/hexdump/display.c b/usr.bin/hexdump/display.c
index 2ab34a49e7ba..c9ba892bbb24 100644
--- a/usr.bin/hexdump/display.c
+++ b/usr.bin/hexdump/display.c
@@ -132,7 +132,8 @@ print(PR *pr, u_char *bp)
(void)printf(pr->fmt, "");
break;
case F_C:
- conv_c(pr, bp);
+ conv_c(pr, bp, eaddress ? eaddress - address :
+ blocksize - address % blocksize);
break;
case F_CHAR:
(void)printf(pr->fmt, *bp);
@@ -261,6 +262,10 @@ get(void)
errx(1, "cannot skip past end of input");
if (need == blocksize)
return((u_char *)NULL);
+ /*
+ * XXX bcmp() is not quite right in the presence
+ * of multibyte characters.
+ */
if (vflag != ALL &&
valid_save &&
bcmp(curp, savp, nread) == 0) {
@@ -284,6 +289,10 @@ get(void)
if (length != -1)
length -= n;
if (!(need -= n)) {
+ /*
+ * XXX bcmp() is not quite right in the presence
+ * of multibyte characters.
+ */
if (vflag == ALL || vflag == FIRST ||
valid_save == 0 ||
bcmp(curp, savp, blocksize) != 0) {
@@ -303,6 +312,27 @@ get(void)
}
}
+size_t
+peek(u_char *buf, size_t nbytes)
+{
+ size_t n, nread;
+ int c;
+
+ if (length != -1 && nbytes > length)
+ nbytes = length;
+ nread = 0;
+ while (nread < nbytes && (c = getchar()) != EOF) {
+ *buf++ = c;
+ nread++;
+ }
+ n = nread;
+ while (n-- > 0) {
+ c = *--buf;
+ ungetc(c, stdin);
+ }
+ return (nread);
+}
+
int
next(char **argv)
{
diff --git a/usr.bin/hexdump/hexdump.h b/usr.bin/hexdump/hexdump.h
index cf3b79c11d14..9a1198f45cf5 100644
--- a/usr.bin/hexdump/hexdump.h
+++ b/usr.bin/hexdump/hexdump.h
@@ -34,6 +34,8 @@
* $FreeBSD$
*/
+#include <wchar.h>
+
typedef struct _pr {
struct _pr *nextpr; /* next print unit */
#define F_ADDRESS 0x001 /* print offset */
@@ -52,6 +54,8 @@ typedef struct _pr {
char *cchar; /* conversion character */
char *fmt; /* printf format */
char *nospace; /* no whitespace version */
+ int mbleft; /* bytes left of multibyte char. */
+ mbstate_t mbstate; /* conversion state */
} PR;
typedef struct _fu {
@@ -88,7 +92,7 @@ void badconv(char *);
void badfmt(const char *);
void badsfmt(void);
void bpad(PR *);
-void conv_c(PR *, u_char *);
+void conv_c(PR *, u_char *, size_t);
void conv_u(PR *, u_char *);
void display(void);
void doskip(const char *, int);
@@ -98,6 +102,7 @@ void newsyntax(int, char ***);
int next(char **);
void nomem(void);
void oldsyntax(int, char ***);
+size_t peek(u_char *, size_t);
void rewrite(FS *);
int size(FS *);
void usage(void);
diff --git a/usr.bin/hexdump/od.1 b/usr.bin/hexdump/od.1
index 225bdecc254b..fb9eb7762d8e 100644
--- a/usr.bin/hexdump/od.1
+++ b/usr.bin/hexdump/od.1
@@ -32,7 +32,7 @@
.\" @(#)od.1 8.1 (Berkeley) 6/6/93
.\" $FreeBSD$
.\"
-.Dd July 3, 2004
+.Dd July 11, 2004
.Os
.Dt OD 1
.Sh NAME
@@ -179,6 +179,10 @@ characters, which are represented as C escapes:
.It vertical tab
\ev
.El
+.Pp
+Multi-byte characters are displayed in the area corresponding to the first
+byte of the character. The remaining bytes are shown as
+.Ql ** .
.It Xo
.Sm off
.Op Cm d | o | u | x
@@ -231,6 +235,15 @@ contain one line for each format.
If no output format is specified,
.Fl t Ar oS
is assumed.
+.Sh ENVIRONMENT
+The
+.Ev LANG , LC_ALL
+and
+.Ev LC_CTYPE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
.Sh DIAGNOSTICS
.Ex -std
.Sh COMPATIBILITY
@@ -252,7 +265,3 @@ An
.Nm
command appeared in
.At v1 .
-.Sh BUGS
-The
-.Nm
-utility does not recognize multibyte characters.