diff options
Diffstat (limited to 'tools/test/iconv/tablegen/tablegen.c')
-rw-r--r-- | tools/test/iconv/tablegen/tablegen.c | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/tools/test/iconv/tablegen/tablegen.c b/tools/test/iconv/tablegen/tablegen.c new file mode 100644 index 000000000000..ee8cecad70d8 --- /dev/null +++ b/tools/test/iconv/tablegen/tablegen.c @@ -0,0 +1,321 @@ +/*- + * Copyright (C) 2009, 2010 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/endian.h> +#include <sys/types.h> + +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <iconv.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> + +#define UC_TO_MB_FLAG 1 +#define MB_TO_WC_FLAG 2 +#define MB_TO_UC_FLAG 4 +#define WC_TO_MB_FLAG 8 + +#define MAX(a,b) ((a) < (b) ? (b) : (a)) + +extern char *__progname; + +static const char *optstr = "cdilrt"; +static const char *citrus_common = "SRC_ZONE\t0x0000-0xFFFF\n" + "OOB_MODE\tILSEQ\n" + "DST_ILSEQ\t0xFFFE\n" + "DST_UNIT_BITS\t32\n\n" + "BEGIN_MAP\n" + "#\n# Generated with Citrus iconv (FreeBSD)\n#\n"; +bool cflag; +bool dflag; +bool iflag; +bool lflag; +bool tflag; +bool rflag; +int fb_flags; + +static void do_conv(iconv_t, bool); +void mb_to_uc_fb(const char*, size_t, + void (*write_replacement)(const unsigned int *, + size_t, void *), void *, void *); +void mb_to_wc_fb(const char*, size_t, + void (*write_replacement) (const wchar_t *, size_t, void *), + void *, void *); +void uc_to_mb_fb(unsigned int, + void (*write_replacement) (const char *, size_t, void *), void *, + void *); +void wc_to_mb_fb(wchar_t, + void (*write_replacement)(const char *, + size_t, void *), void *, void *); + +struct option long_options[] = +{ + {"citrus", no_argument, NULL, 'c'}, + {"diagnostic", no_argument, NULL, 'd'}, + {"ignore", no_argument, NULL, 'i'}, + {"long", no_argument, NULL, 'l'}, + {"reverse", no_argument, NULL, 'r'}, + {"translit", no_argument, NULL, 't'}, + {NULL, no_argument, NULL, 0} +}; + +static void +usage(void) { + + fprintf(stderr, "Usage: %s [-cdilrt] ENCODING\n", __progname); + exit(EXIT_FAILURE); +} + +static void +format_diag(int errcode) +{ + const char *errstr; + const char *u2m, *m2u, *m2w, *w2m; + + switch (errcode) { + case EINVAL: + errstr = "EINVAL "; + break; + case EILSEQ: + errstr = "EILSEQ "; + break; + case E2BIG: + errstr = "E2BIG "; + break; + default: + errstr = "UNKNOWN "; + break; + } + + u2m = (fb_flags & UC_TO_MB_FLAG) ? "U2M " : ""; + m2w = (fb_flags & MB_TO_WC_FLAG) ? "M2W " : ""; + m2u = (fb_flags & MB_TO_UC_FLAG) ? "M2U " : ""; + w2m = (fb_flags & WC_TO_MB_FLAG) ? "W2M " : ""; + + printf("%s%s%s%s%s", errstr, u2m, m2w, m2u, w2m); +} + +static int +magnitude(const uint32_t p) +{ + + if (p >> 8 == 0) + return (1); + else if (p >> 16 == 0) + return (2); + else + return (p >> 24 == 0 ? 3 : 4); +} + +static void +format(const uint32_t data) +{ + + /* XXX: could be simpler, something like this but with leading 0s? + + printf("0x%.*X", magnitude(data), data); + */ + + switch (magnitude(data)) { + default: + case 2: + printf("0x%04X", data); + break; + case 3: + printf("0x%06X", data); + break; + case 4: + printf("0x%08X", data); + break; + } +} + +void +uc_to_mb_fb(unsigned int code, + void (*write_replacement)(const char *buf, size_t buflen, + void* callback_arg), void* callback_arg, void* data) +{ + + fb_flags |= UC_TO_MB_FLAG; +} + +void +mb_to_wc_fb(const char* inbuf, size_t inbufsize, + void (*write_replacement)(const wchar_t *buf, size_t buflen, + void* callback_arg), void* callback_arg, void* data) +{ + + fb_flags |= MB_TO_WC_FLAG; +} + +void +mb_to_uc_fb(const char* inbuf, size_t inbufsize, + void (*write_replacement)(const unsigned int *buf, size_t buflen, + void* callback_arg), void* callback_arg, void* data) +{ + + fb_flags |= MB_TO_UC_FLAG; +} + +void +wc_to_mb_fb(wchar_t wc, + void (*write_replacement)(const char *buf, size_t buflen, + void* callback_arg), void* callback_arg, void* data) +{ + + fb_flags |= WC_TO_MB_FLAG; +} + +int +main (int argc, char *argv[]) +{ + struct iconv_fallbacks fbs; + iconv_t cd; + char *tocode; + char c; + + while (((c = getopt_long(argc, argv, optstr, long_options, NULL)) != -1)) { + switch (c) { + case 'c': + cflag = true; + break; + case 'd': + dflag = true; + break; + case 'i': + iflag = true; + break; + case 'l': + lflag = true; + break; + case 'r': + rflag = true; + break; + case 't': + tflag = true; + break; + } + } + argc -= optind; + argv += optind; + + if (argc < 1) + usage(); + + fbs.uc_to_mb_fallback = uc_to_mb_fb; + fbs.mb_to_wc_fallback = mb_to_wc_fb; + fbs.mb_to_uc_fallback = mb_to_uc_fb; + fbs.wc_to_mb_fallback = wc_to_mb_fb; + fbs.data = NULL; + + if (argc == 2) { + asprintf(&tocode, "%s%s%s", argv[1], tflag ? "//TRASNLIT" : "", + iflag ? "//IGNORE" : ""); + + if ((cd = iconv_open(tocode, argv[0])) == (iconv_t)-1) + err(1, NULL); + if (dflag) { + if (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) + err(1, NULL); + } + do_conv(cd, false); + } else if (rflag) { + asprintf(&tocode, "%s%s%s", argv[0], tflag ? "//TRANSLIT" : "", + iflag ? "//IGNORE" : ""); + + if ((cd = iconv_open(tocode, "UTF-32LE")) == (iconv_t)-1) + err(1, NULL); + if (dflag && iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0) + err(1, NULL); + if (cflag) { + printf("# $FreeBSD$\n\n"); + printf("TYPE\t\tROWCOL\n"); + printf("NAME\t\tUCS/%s\n", argv[0]); + printf("%s", citrus_common); + } + do_conv(cd, true); + } else { + if ((cd = iconv_open("UTF-32LE//TRANSLIT", argv[0])) == (iconv_t)-1) + err(1, NULL); + if (dflag && (iconvctl(cd, ICONV_SET_FALLBACKS, &fbs) != 0)) + err(1, NULL); + if (cflag) { + printf("# $FreeBSD$\n\n"); + printf("TYPE\t\tROWCOL\n"); + printf("NAME\t\t%s/UCS\n", argv[0]); + printf("%s", citrus_common); + } + do_conv(cd, false); + } + + if (iconv_close(cd) != 0) + err(1, NULL); + + return (EXIT_SUCCESS); +} + +static void +do_conv(iconv_t cd, bool uniinput) { + size_t inbytesleft, outbytesleft, ret; + uint32_t outbuf; + uint32_t inbuf; + const char *inbuf_; + char *outbuf_; + + for (inbuf = 0; inbuf < (lflag ? 0x100000 : 0x10000); inbuf += 1) { + if (uniinput && (inbuf >= 0xD800) && (inbuf <= 0xDF00)) + continue; + inbytesleft = uniinput ? 4 : magnitude(inbuf); + outbytesleft = 4; + outbuf = 0x00000000; + outbuf_ = (char *)&outbuf; + inbuf_ = (const char *)&inbuf; + iconv(cd, NULL, NULL, NULL, NULL); + fb_flags = 0; + errno = 0; + ret = iconv(cd, &inbuf_, &inbytesleft, &outbuf_, &outbytesleft); + if (ret == (size_t)-1) { + if (dflag) { + format(inbuf); + printf(" = "); + format_diag(errno); + printf("\n"); + } + continue; + } + format(inbuf); + printf(" = "); + format(outbuf); + printf("\n"); + } + if (cflag) + printf("END_MAP\n"); +} |