aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEd Schouten <ed@FreeBSD.org>2013-06-03 17:17:56 +0000
committerEd Schouten <ed@FreeBSD.org>2013-06-03 17:17:56 +0000
commit49111f0092c9eff1bc03d95c7ca6275dc677b273 (patch)
tree925fa5249e7eb65c98a1c3fed8e18bbc85e4efd2
parentf8ca2db1f82e3a08c25740c7c3f99323794e6ca7 (diff)
downloadsrc-49111f0092c9eff1bc03d95c7ca6275dc677b273.tar.gz
src-49111f0092c9eff1bc03d95c7ca6275dc677b273.zip
Add libiconv based versions of *c16*() and *c32*().
I initially thought wchar_t was locale independent, but this seems to be only the case on Linux. This means that we cannot depend on the *wc*() routines to implement *c16*() and *c32*(). Instead, use the Citrus libiconv that is part of libc. I'll see if there is anything I can do to make the existing functions somewhat useful in case the system is built without libiconv in the nearby future. If not, I'll simply remove the broken implementations. Reviewed by: jilles, gabor
Notes
Notes: svn path=/head/; revision=251314
-rw-r--r--lib/libc/locale/Makefile.inc12
-rw-r--r--lib/libc/locale/c16rtomb_iconv.c8
-rw-r--r--lib/libc/locale/c32rtomb_iconv.c8
-rw-r--r--lib/libc/locale/cXXrtomb_iconv.h115
-rw-r--r--lib/libc/locale/mbrtoc16_iconv.c8
-rw-r--r--lib/libc/locale/mbrtoc32_iconv.c8
-rw-r--r--lib/libc/locale/mbrtocXX_iconv.h158
-rw-r--r--tools/regression/lib/libc/locale/test-c16rtomb.c30
-rw-r--r--tools/regression/lib/libc/locale/test-mbrtoc16.c45
9 files changed, 389 insertions, 3 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc
index ffef22862e8f..c2f2f4e61727 100644
--- a/lib/libc/locale/Makefile.inc
+++ b/lib/libc/locale/Makefile.inc
@@ -4,11 +4,11 @@
# locale sources
.PATH: ${.CURDIR}/${LIBC_ARCH}/locale ${.CURDIR}/locale
-SRCS+= ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \
- fix_grouping.c gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
+SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
+ gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
mbrlen.c \
- mbrtoc16.c mbrtoc32.c mbrtowc.c mbsinit.c mbsnrtowcs.c \
+ mbrtowc.c mbsinit.c mbsnrtowcs.c \
mbsrtowcs.c mbtowc.c mbstowcs.c \
mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rpmatch.c \
rune.c \
@@ -23,6 +23,12 @@ SRCS+= ascii.c big5.c btowc.c c16rtomb.c c32rtomb.c collate.c collcmp.c euc.c \
wcwidth.c\
xlocale.c
+.if ${MK_ICONV} != "no"
+SRCS+= c16rtomb_iconv.c c32rtomb_iconv.c mbrtoc16_iconv.c mbrtoc32_iconv.c
+.else
+SRCS+= c16rtomb.c c32rtomb.c mbrtoc16.c mbrtoc32.c
+.endif
+
SYM_MAPS+=${.CURDIR}/locale/Symbol.map
MAN+= btowc.3 \
diff --git a/lib/libc/locale/c16rtomb_iconv.c b/lib/libc/locale/c16rtomb_iconv.c
new file mode 100644
index 000000000000..86bd9dab2a52
--- /dev/null
+++ b/lib/libc/locale/c16rtomb_iconv.c
@@ -0,0 +1,8 @@
+/* $FreeBSD$ */
+#define charXX_t char16_t
+#define cXXrtomb c16rtomb
+#define cXXrtomb_l c16rtomb_l
+#define SRCBUF_LEN 2
+#define UTF_XX_INTERNAL "UTF-16-INTERNAL"
+
+#include "cXXrtomb_iconv.h"
diff --git a/lib/libc/locale/c32rtomb_iconv.c b/lib/libc/locale/c32rtomb_iconv.c
new file mode 100644
index 000000000000..dabbfd7f7ab4
--- /dev/null
+++ b/lib/libc/locale/c32rtomb_iconv.c
@@ -0,0 +1,8 @@
+/* $FreeBSD$ */
+#define charXX_t char32_t
+#define cXXrtomb c32rtomb
+#define cXXrtomb_l c32rtomb_l
+#define SRCBUF_LEN 1
+#define UTF_XX_INTERNAL "UTF-32-INTERNAL"
+
+#include "cXXrtomb_iconv.h"
diff --git a/lib/libc/locale/cXXrtomb_iconv.h b/lib/libc/locale/cXXrtomb_iconv.h
new file mode 100644
index 000000000000..d6e7ce0ae3ac
--- /dev/null
+++ b/lib/libc/locale/cXXrtomb_iconv.h
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <uchar.h>
+
+#include "../iconv/citrus_hash.h"
+#include "../iconv/citrus_module.h"
+#include "../iconv/citrus_iconv.h"
+#include "xlocale_private.h"
+
+typedef struct {
+ bool initialized;
+ struct _citrus_iconv iconv;
+ union {
+ charXX_t widechar[SRCBUF_LEN];
+ char bytes[sizeof(charXX_t) * SRCBUF_LEN];
+ } srcbuf;
+ size_t srcbuf_len;
+} _ConversionState;
+_Static_assert(sizeof(_ConversionState) <= sizeof(mbstate_t),
+ "Size of _ConversionState must not exceed mbstate_t's size.");
+
+size_t
+cXXrtomb_l(char * __restrict s, charXX_t c, mbstate_t * __restrict ps,
+ locale_t locale)
+{
+ _ConversionState *cs;
+ struct _citrus_iconv *handle;
+ char *src, *dst;
+ size_t srcleft, dstleft, invlen;
+ int err;
+
+ FIX_LOCALE(locale);
+ if (ps == NULL)
+ ps = &locale->cXXrtomb;
+ cs = (_ConversionState *)ps;
+ handle = &cs->iconv;
+
+ /* Reinitialize mbstate_t. */
+ if (s == NULL || !cs->initialized) {
+ if (_citrus_iconv_open(&handle, UTF_XX_INTERNAL,
+ nl_langinfo_l(CODESET, locale)) != 0) {
+ cs->initialized = false;
+ errno = EINVAL;
+ return (-1);
+ }
+ handle->cv_shared->ci_discard_ilseq = true;
+ handle->cv_shared->ci_hooks = NULL;
+ cs->srcbuf_len = 0;
+ cs->initialized = true;
+ if (s == NULL)
+ return (1);
+ }
+
+ assert(cs->srcbuf_len < sizeof(cs->srcbuf.widechar) / sizeof(charXX_t));
+ cs->srcbuf.widechar[cs->srcbuf_len++] = c;
+
+ /* Perform conversion. */
+ src = cs->srcbuf.bytes;
+ srcleft = cs->srcbuf_len * sizeof(charXX_t);
+ dst = s;
+ dstleft = MB_CUR_MAX_L(locale);
+ err = _citrus_iconv_convert(handle, &src, &srcleft, &dst, &dstleft,
+ 0, &invlen);
+
+ /* Character is part of a surrogate pair. We need more input. */
+ if (err == EINVAL)
+ return (0);
+ cs->srcbuf_len = 0;
+
+ /* Illegal sequence. */
+ if (dst == s) {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ return (dst - s);
+}
+
+size_t
+cXXrtomb(char * __restrict s, charXX_t c, mbstate_t * __restrict ps)
+{
+
+ return (cXXrtomb_l(s, c, ps, __get_locale()));
+}
diff --git a/lib/libc/locale/mbrtoc16_iconv.c b/lib/libc/locale/mbrtoc16_iconv.c
new file mode 100644
index 000000000000..f1eaf1925496
--- /dev/null
+++ b/lib/libc/locale/mbrtoc16_iconv.c
@@ -0,0 +1,8 @@
+/* $FreeBSD$ */
+#define charXX_t char16_t
+#define mbrtocXX mbrtoc16
+#define mbrtocXX_l mbrtoc16_l
+#define DSTBUF_LEN 2
+#define UTF_XX_INTERNAL "UTF-16-INTERNAL"
+
+#include "mbrtocXX_iconv.h"
diff --git a/lib/libc/locale/mbrtoc32_iconv.c b/lib/libc/locale/mbrtoc32_iconv.c
new file mode 100644
index 000000000000..ec2c0145d9d6
--- /dev/null
+++ b/lib/libc/locale/mbrtoc32_iconv.c
@@ -0,0 +1,8 @@
+/* $FreeBSD$ */
+#define charXX_t char32_t
+#define mbrtocXX mbrtoc32
+#define mbrtocXX_l mbrtoc32_l
+#define DSTBUF_LEN 1
+#define UTF_XX_INTERNAL "UTF-32-INTERNAL"
+
+#include "mbrtocXX_iconv.h"
diff --git a/lib/libc/locale/mbrtocXX_iconv.h b/lib/libc/locale/mbrtocXX_iconv.h
new file mode 100644
index 000000000000..9eb6f6831eb6
--- /dev/null
+++ b/lib/libc/locale/mbrtocXX_iconv.h
@@ -0,0 +1,158 @@
+/*-
+ * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/queue.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <limits.h>
+#include <string.h>
+#include <uchar.h>
+
+#include "../iconv/citrus_hash.h"
+#include "../iconv/citrus_module.h"
+#include "../iconv/citrus_iconv.h"
+#include "xlocale_private.h"
+
+typedef struct {
+ bool initialized;
+ struct _citrus_iconv iconv;
+ char srcbuf[MB_LEN_MAX];
+ size_t srcbuf_len;
+ union {
+ charXX_t widechar[DSTBUF_LEN];
+ char bytes[sizeof(charXX_t) * DSTBUF_LEN];
+ } dstbuf;
+ size_t dstbuf_len;
+} _ConversionState;
+_Static_assert(sizeof(_ConversionState) <= sizeof(mbstate_t),
+ "Size of _ConversionState must not exceed mbstate_t's size.");
+
+size_t
+mbrtocXX_l(charXX_t * __restrict pc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps, locale_t locale)
+{
+ _ConversionState *cs;
+ struct _citrus_iconv *handle;
+ size_t i, retval;
+ charXX_t retchar;
+
+ FIX_LOCALE(locale);
+ if (ps == NULL)
+ ps = &locale->mbrtocXX;
+ cs = (_ConversionState *)ps;
+ handle = &cs->iconv;
+
+ /* Reinitialize mbstate_t. */
+ if (s == NULL || !cs->initialized) {
+ if (_citrus_iconv_open(&handle,
+ nl_langinfo_l(CODESET, locale), UTF_XX_INTERNAL) != 0) {
+ cs->initialized = false;
+ errno = EINVAL;
+ return (-1);
+ }
+ handle->cv_shared->ci_discard_ilseq = true;
+ handle->cv_shared->ci_hooks = NULL;
+ cs->srcbuf_len = cs->dstbuf_len = 0;
+ cs->initialized = true;
+ if (s == NULL)
+ return (0);
+ }
+
+ /* See if we still have characters left from the previous invocation. */
+ if (cs->dstbuf_len > 0) {
+ retval = (size_t)-3;
+ goto return_char;
+ }
+
+ /* Fill up the read buffer as far as possible. */
+ if (n > sizeof(cs->srcbuf) - cs->srcbuf_len)
+ n = sizeof(cs->srcbuf) - cs->srcbuf_len;
+ memcpy(cs->srcbuf + cs->srcbuf_len, s, n);
+
+ /* Convert as few characters to the dst buffer as possible. */
+ for (i = 0; ; i++) {
+ char *src, *dst;
+ size_t srcleft, dstleft, invlen;
+ int err;
+
+ src = cs->srcbuf;
+ srcleft = cs->srcbuf_len + n;
+ dst = cs->dstbuf.bytes;
+ dstleft = i * sizeof(charXX_t);
+ assert(srcleft <= sizeof(cs->srcbuf) &&
+ dstleft <= sizeof(cs->dstbuf.bytes));
+ err = _citrus_iconv_convert(handle, &src, &srcleft,
+ &dst, &dstleft, 0, &invlen);
+ cs->dstbuf_len = (dst - cs->dstbuf.bytes) / sizeof(charXX_t);
+
+ /* Got new character(s). Return the first. */
+ if (cs->dstbuf_len > 0) {
+ assert(src - cs->srcbuf > cs->srcbuf_len);
+ retval = src - cs->srcbuf - cs->srcbuf_len;
+ cs->srcbuf_len = 0;
+ goto return_char;
+ }
+
+ /* Increase dst buffer size, to obtain the surrogate pair. */
+ if (err == E2BIG)
+ continue;
+
+ /* Illegal sequence. */
+ if (invlen > 0) {
+ cs->srcbuf_len = 0;
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+
+ /* Save unprocessed remainder for the next invocation. */
+ memmove(cs->srcbuf, src, srcleft);
+ cs->srcbuf_len = srcleft;
+ return ((size_t)-2);
+ }
+
+return_char:
+ retchar = cs->dstbuf.widechar[0];
+ memmove(&cs->dstbuf.widechar[0], &cs->dstbuf.widechar[1],
+ --cs->dstbuf_len * sizeof(charXX_t));
+ if (pc != NULL)
+ *pc = retchar;
+ if (retchar == 0)
+ return (0);
+ return (retval);
+}
+
+size_t
+mbrtocXX(charXX_t * __restrict pc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps)
+{
+
+ return (mbrtocXX_l(pc, s, n, ps, __get_locale()));
+}
diff --git a/tools/regression/lib/libc/locale/test-c16rtomb.c b/tools/regression/lib/libc/locale/test-c16rtomb.c
index eb889468e8c4..2c188fa337b8 100644
--- a/tools/regression/lib/libc/locale/test-c16rtomb.c
+++ b/tools/regression/lib/libc/locale/test-c16rtomb.c
@@ -82,6 +82,34 @@ main(int argc, char *argv[])
assert(c16rtomb(buf, 0xd83d, &s) == 0);
assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
assert(errno == EILSEQ);
+ assert((unsigned char)buf[0] == 0xcc);
+
+ /*
+ * ISO8859-1.
+ */
+
+ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-1"),
+ "en_US.ISO8859-1") == 0);
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ assert(c16rtomb(buf, 0x20ac, &s) == (size_t)-1);
+ assert(errno == EILSEQ);
+ assert((unsigned char)buf[0] == 0xcc);
+
+ /*
+ * ISO8859-15.
+ */
+
+ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-15"),
+ "en_US.ISO8859-15") == 0);
+
+ /* Unicode character 'Euro sign'. */
+ memset(&s, 0, sizeof(s));
+ memset(buf, 0xcc, sizeof(buf));
+ assert(c16rtomb(buf, 0x20ac, &s) == 1);
+ assert((unsigned char)buf[0] == 0xa4 && (unsigned char)buf[1] == 0xcc);
/*
* UTF-8.
@@ -104,12 +132,14 @@ main(int argc, char *argv[])
assert(c16rtomb(buf, 0xd83d, &s) == 0);
assert(c16rtomb(buf, L'A', &s) == (size_t)-1);
assert(errno == EILSEQ);
+ assert((unsigned char)buf[0] == 0xcc);
/* Invalid code; 'Pile of poo' without the lead surrogate. */
memset(&s, 0, sizeof(s));
memset(buf, 0xcc, sizeof(buf));
assert(c16rtomb(buf, 0xdca9, &s) == (size_t)-1);
assert(errno == EILSEQ);
+ assert((unsigned char)buf[0] == 0xcc);
printf("ok 1 - c16rtomb()\n");
}
diff --git a/tools/regression/lib/libc/locale/test-mbrtoc16.c b/tools/regression/lib/libc/locale/test-mbrtoc16.c
index 88e8091d43ac..f709a9c80323 100644
--- a/tools/regression/lib/libc/locale/test-mbrtoc16.c
+++ b/tools/regression/lib/libc/locale/test-mbrtoc16.c
@@ -85,6 +85,37 @@ main(int argc, char *argv[])
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-2);
assert(c16 == L'z');
+ /* Check that mbrtoc16() doesn't read ahead too aggressively. */
+ memset(&s, 0, sizeof(s));
+ assert(mbrtoc16(&c16, "AB", 2, &s) == 1);
+ assert(c16 == L'A');
+ assert(mbrtoc16(&c16, "C", 1, &s) == 1);
+ assert(c16 == L'C');
+
+ /*
+ * ISO-8859-1.
+ */
+
+ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-1"),
+ "en_US.ISO8859-1") == 0);
+
+ /* Currency sign. */
+ memset(&s, 0, sizeof(s));
+ assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
+ assert(c16 == 0xa4);
+
+ /*
+ * ISO-8859-15.
+ */
+
+ assert(strcmp(setlocale(LC_CTYPE, "en_US.ISO8859-15"),
+ "en_US.ISO8859-15") == 0);
+
+ /* Euro sign. */
+ memset(&s, 0, sizeof(s));
+ assert(mbrtoc16(&c16, "\xa4", 1, &s) == 1);
+ assert(c16 == 0x20ac);
+
/*
* UTF-8.
*/
@@ -144,6 +175,20 @@ main(int argc, char *argv[])
assert(mbrtoc16(&c16, "", 0, &s) == (size_t)-3);
assert(c16 == 0xdca9);
+ /* Letter e with acute, precomposed. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ assert(mbrtoc16(&c16, "\xc3\xa9", 2, &s) == 2);
+ assert(c16 == 0xe9);
+
+ /* Letter e with acute, combined. */
+ memset(&s, 0, sizeof(s));
+ c16 = 0;
+ assert(mbrtoc16(&c16, "\x65\xcc\x81", 3, &s) == 1);
+ assert(c16 == 0x65);
+ assert(mbrtoc16(&c16, "\xcc\x81", 2, &s) == 2);
+ assert(c16 == 0x301);
+
printf("ok 1 - mbrtoc16()\n");
return (0);