diff options
Diffstat (limited to 'contrib/nvi/common/conv.c')
-rw-r--r-- | contrib/nvi/common/conv.c | 446 |
1 files changed, 446 insertions, 0 deletions
diff --git a/contrib/nvi/common/conv.c b/contrib/nvi/common/conv.c new file mode 100644 index 000000000000..7803cec9922e --- /dev/null +++ b/contrib/nvi/common/conv.c @@ -0,0 +1,446 @@ +/*- + * Copyright (c) 1993, 1994 + * The Regents of the University of California. All rights reserved. + * Copyright (c) 1993, 1994, 1995, 1996 + * Keith Bostic. All rights reserved. + * Copyright (c) 2011, 2012 + * Zhihao Yuan. All rights reserved. + * + * See the LICENSE file for redistribution information. + */ + +#include "config.h" + +#ifndef lint +static const char sccsid[] = "$Id: conv.c,v 2.39 2013/07/01 23:28:13 zy Exp $"; +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/queue.h> +#include <sys/time.h> + +#include <bitstring.h> +#include <errno.h> +#include <limits.h> +#include <langinfo.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +#include "common.h" + +/* + * codeset -- + * Get the locale encoding. + * + * PUBLIC: char * codeset __P((void)); + */ +char * +codeset(void) { + static char *cs; + + if (cs == NULL) + cs = nl_langinfo(CODESET); + return cs; +} + +#ifdef USE_WIDECHAR +static int +raw2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, + size_t *tolen, CHAR_T **dst) +{ + int i; + CHAR_T **tostr = &cw->bp1.wc; + size_t *blen = &cw->blen1; + + BINC_RETW(NULL, *tostr, *blen, len); + + *tolen = len; + for (i = 0; i < len; ++i) + (*tostr)[i] = (u_char) str[i]; + + *dst = cw->bp1.wc; + + return 0; +} + +#define CONV_BUFFER_SIZE 512 +/* fill the buffer with codeset encoding of string pointed to by str + * left has the number of bytes left in str and is adjusted + * len contains the number of bytes put in the buffer + */ +#ifdef USE_ICONV +#define CONVERT(str, left, src, len) \ + do { \ + size_t outleft; \ + char *bp = buffer; \ + outleft = CONV_BUFFER_SIZE; \ + errno = 0; \ + if (iconv(id, (iconv_src_t)&str, &left, &bp, &outleft) == -1 && \ + errno != E2BIG) \ + goto err; \ + if ((len = CONV_BUFFER_SIZE - outleft) == 0) { \ + error = -left; \ + goto err; \ + } \ + src = buffer; \ + } while (0) + +#define IC_RESET() \ + do { \ + if (id != (iconv_t)-1) \ + iconv(id, NULL, NULL, NULL, NULL); \ + } while(0) +#else +#define CONVERT(str, left, src, len) +#define IC_RESET() +#endif + +static int +default_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, + size_t *tolen, CHAR_T **dst, iconv_t id) +{ + size_t i = 0, j; + CHAR_T **tostr = &cw->bp1.wc; + size_t *blen = &cw->blen1; + mbstate_t mbs; + size_t n; + ssize_t nlen = len; + char *src = (char *)str; +#ifdef USE_ICONV + char buffer[CONV_BUFFER_SIZE]; +#endif + size_t left = len; + int error = 1; + + BZERO(&mbs, 1); + BINC_RETW(NULL, *tostr, *blen, nlen); + +#ifdef USE_ICONV + if (id != (iconv_t)-1) + CONVERT(str, left, src, len); +#endif + + for (i = 0, j = 0; j < len; ) { + n = mbrtowc((*tostr)+i, src+j, len-j, &mbs); + /* NULL character converted */ + if (n == -2) error = -(len-j); + if (n == -1 || n == -2) goto err; + if (n == 0) n = 1; + j += n; + if (++i >= *blen) { + nlen += 256; + BINC_RETW(NULL, *tostr, *blen, nlen); + } + if (id != (iconv_t)-1 && j == len && left) { + CONVERT(str, left, src, len); + j = 0; + } + } + + error = 0; +err: + *tolen = i; + *dst = cw->bp1.wc; + IC_RESET(); + + return error; +} + +static int +fe_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, + size_t *tolen, CHAR_T **dst) +{ + return default_char2int(sp, str, len, cw, tolen, dst, + sp->conv.id[IC_FE_CHAR2INT]); +} + +static int +ie_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, + size_t *tolen, CHAR_T **dst) +{ + return default_char2int(sp, str, len, cw, tolen, dst, + sp->conv.id[IC_IE_CHAR2INT]); +} + +static int +cs_char2int(SCR *sp, const char * str, ssize_t len, CONVWIN *cw, + size_t *tolen, CHAR_T **dst) +{ + return default_char2int(sp, str, len, cw, tolen, dst, + (iconv_t)-1); +} + +static int +int2raw(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, + size_t *tolen, char **dst) +{ + int i; + char **tostr = &cw->bp1.c; + size_t *blen = &cw->blen1; + + BINC_RETC(NULL, *tostr, *blen, len); + + *tolen = len; + for (i = 0; i < len; ++i) + (*tostr)[i] = str[i]; + + *dst = cw->bp1.c; + + return 0; +} + +static int +default_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, + size_t *tolen, char **pdst, iconv_t id) +{ + size_t i, j, offset = 0; + char **tostr = &cw->bp1.c; + size_t *blen = &cw->blen1; + mbstate_t mbs; + size_t n; + ssize_t nlen = len + MB_CUR_MAX; + char *dst; + size_t buflen; +#ifdef USE_ICONV + char buffer[CONV_BUFFER_SIZE]; +#endif + int error = 1; + +/* convert first len bytes of buffer and append it to cw->bp + * len is adjusted => 0 + * offset contains the offset in cw->bp and is adjusted + * cw->bp is grown as required + */ +#ifdef USE_ICONV +#define CONVERT2(_buffer, lenp, cw, offset) \ + do { \ + char *bp = _buffer; \ + int ret; \ + do { \ + size_t outleft = cw->blen1 - offset; \ + char *obp = cw->bp1.c + offset; \ + if (cw->blen1 < offset + MB_CUR_MAX) { \ + nlen += 256; \ + BINC_RETC(NULL, cw->bp1.c, cw->blen1, nlen); \ + } \ + errno = 0; \ + ret = iconv(id, (iconv_src_t)&bp, lenp, &obp, &outleft); \ + if (ret == -1 && errno != E2BIG) \ + goto err; \ + offset = cw->blen1 - outleft; \ + } while (ret != 0); \ + } while (0) +#else +#define CONVERT2(_buffer, lenp, cw, offset) +#endif + + + BZERO(&mbs, 1); + BINC_RETC(NULL, *tostr, *blen, nlen); + dst = *tostr; buflen = *blen; + +#ifdef USE_ICONV + if (id != (iconv_t)-1) { + dst = buffer; buflen = CONV_BUFFER_SIZE; + } +#endif + + for (i = 0, j = 0; i < len; ++i) { + n = wcrtomb(dst+j, str[i], &mbs); + if (n == -1) goto err; + j += n; + if (buflen < j + MB_CUR_MAX) { + if (id != (iconv_t)-1) { + CONVERT2(buffer, &j, cw, offset); + } else { + nlen += 256; + BINC_RETC(NULL, *tostr, *blen, nlen); + dst = *tostr; buflen = *blen; + } + } + } + + n = wcrtomb(dst+j, L'\0', &mbs); + j += n - 1; /* don't count NUL at the end */ + *tolen = j; + + if (id != (iconv_t)-1) { + CONVERT2(buffer, &j, cw, offset); + CONVERT2(NULL, NULL, cw, offset); /* back to the initial state */ + *tolen = offset; + } + + error = 0; +err: + if (error) + *tolen = j; + *pdst = cw->bp1.c; + IC_RESET(); + + return error; +} + +static int +fe_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, + size_t *tolen, char **dst) +{ + return default_int2char(sp, str, len, cw, tolen, dst, + sp->conv.id[IC_FE_INT2CHAR]); +} + +static int +cs_int2char(SCR *sp, const CHAR_T * str, ssize_t len, CONVWIN *cw, + size_t *tolen, char **dst) +{ + return default_int2char(sp, str, len, cw, tolen, dst, + (iconv_t)-1); +} + +#endif + +/* + * conv_init -- + * Initialize the iconv environment. + * + * PUBLIC: void conv_init __P((SCR *, SCR *)); + */ +void +conv_init(SCR *orig, SCR *sp) +{ + int i; + + if (orig == NULL) + setlocale(LC_ALL, ""); + if (orig != NULL) + BCOPY(&orig->conv, &sp->conv, 1); +#ifdef USE_WIDECHAR + else { + char *ctype = setlocale(LC_CTYPE, NULL); + + /* + * XXX + * This hack fixes the libncursesw issue on FreeBSD. + */ + if (!strcmp(ctype, "ko_KR.CP949")) + setlocale(LC_CTYPE, "ko_KR.eucKR"); + else if (!strcmp(ctype, "zh_CN.GB2312")) + setlocale(LC_CTYPE, "zh_CN.eucCN"); + else if (!strcmp(ctype, "zh_CN.GBK")) + setlocale(LC_CTYPE, "zh_CN.GB18030"); + + /* + * Switch to 8bit mode if locale is C; + * LC_CTYPE should be reseted to C if unmatched. + */ + if (!strcmp(ctype, "C") || !strcmp(ctype, "POSIX")) { + sp->conv.sys2int = sp->conv.file2int = raw2int; + sp->conv.int2sys = sp->conv.int2file = int2raw; + sp->conv.input2int = raw2int; + } else { + sp->conv.sys2int = cs_char2int; + sp->conv.int2sys = cs_int2char; + sp->conv.file2int = fe_char2int; + sp->conv.int2file = fe_int2char; + sp->conv.input2int = ie_char2int; + } +#ifdef USE_ICONV + o_set(sp, O_INPUTENCODING, OS_STRDUP, codeset(), 0); +#endif + } +#endif + + /* iconv descriptors must be distinct to screens. */ + for (i = 0; i <= IC_IE_TO_UTF16; ++i) + sp->conv.id[i] = (iconv_t)-1; +#ifdef USE_ICONV + conv_enc(sp, O_INPUTENCODING, 0); +#endif +} + +/* + * conv_enc -- + * Convert file/input encoding. + * + * PUBLIC: int conv_enc __P((SCR *, int, char *)); + */ +int +conv_enc(SCR *sp, int option, char *enc) +{ +#if defined(USE_WIDECHAR) && defined(USE_ICONV) + iconv_t *c2w, *w2c; + + switch (option) { + case O_FILEENCODING: + c2w = sp->conv.id + IC_FE_CHAR2INT; + w2c = sp->conv.id + IC_FE_INT2CHAR; + if (!enc) enc = O_STR(sp, O_FILEENCODING); + if (*c2w != (iconv_t)-1) + iconv_close(*c2w); + if (*w2c != (iconv_t)-1) + iconv_close(*w2c); + if (strcasecmp(codeset(), enc)) { + if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) + goto err; + if ((*w2c = iconv_open(enc, codeset())) == (iconv_t)-1) + goto err; + } else *c2w = *w2c = (iconv_t)-1; + break; + case O_INPUTENCODING: + c2w = sp->conv.id + IC_IE_CHAR2INT; + w2c = sp->conv.id + IC_IE_TO_UTF16; + if (!enc) enc = O_STR(sp, O_INPUTENCODING); + if (*c2w != (iconv_t)-1) + iconv_close(*c2w); + if (*w2c != (iconv_t)-1) + iconv_close(*w2c); + if (strcasecmp(codeset(), enc)) { + if ((*c2w = iconv_open(codeset(), enc)) == (iconv_t)-1) + goto err; + } else *c2w = (iconv_t)-1; + /* UTF-16 can not be locale and can not be inputed. */ + if ((*w2c = iconv_open("utf-16be", enc)) == (iconv_t)-1) + goto err; + break; + } + + F_CLR(sp, SC_CONV_ERROR); + F_SET(sp, SC_SCR_REFORMAT); + + return 0; +err: +#endif + switch (option) { + case O_FILEENCODING: + msgq(sp, M_ERR, + "321|File encoding conversion not supported"); + break; + case O_INPUTENCODING: + msgq(sp, M_ERR, + "322|Input encoding conversion not supported"); + break; + } + return 1; +} + +/* + * conv_end -- + * Close the iconv descriptors, release the buffer. + * + * PUBLIC: void conv_end __P((SCR *)); + */ +void +conv_end(SCR *sp) +{ +#if defined(USE_WIDECHAR) && defined(USE_ICONV) + int i; + for (i = 0; i <= IC_IE_TO_UTF16; ++i) + if (sp->conv.id[i] != (iconv_t)-1) + iconv_close(sp->conv.id[i]); + if (sp->cw.bp1.c != NULL) + free(sp->cw.bp1.c); +#endif +} |