diff options
Diffstat (limited to 'usr.bin')
-rw-r--r-- | usr.bin/hexdump/conv.c | 67 | ||||
-rw-r--r-- | usr.bin/hexdump/display.c | 32 | ||||
-rw-r--r-- | usr.bin/hexdump/hexdump.h | 7 | ||||
-rw-r--r-- | usr.bin/hexdump/od.1 | 19 |
4 files changed, 114 insertions, 11 deletions
diff --git a/usr.bin/hexdump/conv.c b/usr.bin/hexdump/conv.c index 581b66631626..5d8ade9ed0fc 100644 --- a/usr.bin/hexdump/conv.c +++ b/usr.bin/hexdump/conv.c @@ -39,15 +39,30 @@ __FBSDID("$FreeBSD$"); #include <sys/types.h> +#include <assert.h> #include <stdio.h> #include <ctype.h> +#include <limits.h> +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> #include "hexdump.h" void -conv_c(PR *pr, u_char *p) +conv_c(PR *pr, u_char *p, size_t bufsize) { char buf[10]; char const *str; + wchar_t wc; + size_t clen, oclen; + int converr, pad, width; + char peekbuf[MB_LEN_MAX]; + + if (pr->mbleft > 0) { + str = "**"; + pr->mbleft--; + goto strpr; + } switch(*p) { case '\0': @@ -78,9 +93,53 @@ conv_c(PR *pr, u_char *p) default: break; } - if (isprint(*p)) { - *pr->cchar = 'c'; - (void)printf(pr->fmt, *p); + /* + * Multibyte characters are disabled for hexdump(1) for backwards + * compatibility and consistency (none of its other output formats + * recognize them correctly). + */ + converr = 0; + if (odmode && MB_CUR_MAX > 1) { + oclen = 0; +retry: + clen = mbrtowc(&wc, p, bufsize, &pr->mbstate); + if (clen == 0) + clen = 1; + else if (clen == (size_t)-1 || (clen == (size_t)-2 && + buf == peekbuf)) { + memset(&pr->mbstate, 0, sizeof(pr->mbstate)); + wc = *p; + clen = 1; + converr = 1; + } else if (clen == (size_t)-2) { + /* + * Incomplete character; peek ahead and see if we + * can complete it. + */ + oclen = bufsize; + bufsize = peek(p = peekbuf, MB_CUR_MAX); + goto retry; + } + clen += oclen; + } else { + wc = *p; + clen = 1; + } + if (!converr && iswprint(wc)) { + if (!odmode) { + *pr->cchar = 'c'; + (void)printf(pr->fmt, (int)wc); + } else { + *pr->cchar = 'C'; + assert(strcmp(pr->fmt, "%3C") == 0); + width = wcwidth(wc); + assert(width > 0); + pad = 3 - width; + if (pad < 0) + pad = 0; + (void)printf("%*s%C", pad, "", wc); + pr->mbleft = clen - 1; + } } else { (void)sprintf(buf, "%03o", (int)*p); str = buf; diff --git a/usr.bin/hexdump/display.c b/usr.bin/hexdump/display.c index 2ab34a49e7ba..c9ba892bbb24 100644 --- a/usr.bin/hexdump/display.c +++ b/usr.bin/hexdump/display.c @@ -132,7 +132,8 @@ print(PR *pr, u_char *bp) (void)printf(pr->fmt, ""); break; case F_C: - conv_c(pr, bp); + conv_c(pr, bp, eaddress ? eaddress - address : + blocksize - address % blocksize); break; case F_CHAR: (void)printf(pr->fmt, *bp); @@ -261,6 +262,10 @@ get(void) errx(1, "cannot skip past end of input"); if (need == blocksize) return((u_char *)NULL); + /* + * XXX bcmp() is not quite right in the presence + * of multibyte characters. + */ if (vflag != ALL && valid_save && bcmp(curp, savp, nread) == 0) { @@ -284,6 +289,10 @@ get(void) if (length != -1) length -= n; if (!(need -= n)) { + /* + * XXX bcmp() is not quite right in the presence + * of multibyte characters. + */ if (vflag == ALL || vflag == FIRST || valid_save == 0 || bcmp(curp, savp, blocksize) != 0) { @@ -303,6 +312,27 @@ get(void) } } +size_t +peek(u_char *buf, size_t nbytes) +{ + size_t n, nread; + int c; + + if (length != -1 && nbytes > length) + nbytes = length; + nread = 0; + while (nread < nbytes && (c = getchar()) != EOF) { + *buf++ = c; + nread++; + } + n = nread; + while (n-- > 0) { + c = *--buf; + ungetc(c, stdin); + } + return (nread); +} + int next(char **argv) { diff --git a/usr.bin/hexdump/hexdump.h b/usr.bin/hexdump/hexdump.h index cf3b79c11d14..9a1198f45cf5 100644 --- a/usr.bin/hexdump/hexdump.h +++ b/usr.bin/hexdump/hexdump.h @@ -34,6 +34,8 @@ * $FreeBSD$ */ +#include <wchar.h> + typedef struct _pr { struct _pr *nextpr; /* next print unit */ #define F_ADDRESS 0x001 /* print offset */ @@ -52,6 +54,8 @@ typedef struct _pr { char *cchar; /* conversion character */ char *fmt; /* printf format */ char *nospace; /* no whitespace version */ + int mbleft; /* bytes left of multibyte char. */ + mbstate_t mbstate; /* conversion state */ } PR; typedef struct _fu { @@ -88,7 +92,7 @@ void badconv(char *); void badfmt(const char *); void badsfmt(void); void bpad(PR *); -void conv_c(PR *, u_char *); +void conv_c(PR *, u_char *, size_t); void conv_u(PR *, u_char *); void display(void); void doskip(const char *, int); @@ -98,6 +102,7 @@ void newsyntax(int, char ***); int next(char **); void nomem(void); void oldsyntax(int, char ***); +size_t peek(u_char *, size_t); void rewrite(FS *); int size(FS *); void usage(void); diff --git a/usr.bin/hexdump/od.1 b/usr.bin/hexdump/od.1 index 225bdecc254b..fb9eb7762d8e 100644 --- a/usr.bin/hexdump/od.1 +++ b/usr.bin/hexdump/od.1 @@ -32,7 +32,7 @@ .\" @(#)od.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd July 3, 2004 +.Dd July 11, 2004 .Os .Dt OD 1 .Sh NAME @@ -179,6 +179,10 @@ characters, which are represented as C escapes: .It vertical tab \ev .El +.Pp +Multi-byte characters are displayed in the area corresponding to the first +byte of the character. The remaining bytes are shown as +.Ql ** . .It Xo .Sm off .Op Cm d | o | u | x @@ -231,6 +235,15 @@ contain one line for each format. If no output format is specified, .Fl t Ar oS is assumed. +.Sh ENVIRONMENT +The +.Ev LANG , LC_ALL +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . .Sh DIAGNOSTICS .Ex -std .Sh COMPATIBILITY @@ -252,7 +265,3 @@ An .Nm command appeared in .At v1 . -.Sh BUGS -The -.Nm -utility does not recognize multibyte characters. |