1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
/*
* Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
*
* libcbor is free software; you can redistribute it and/or modify
* it under the terms of the MIT license. See LICENSE for details.
*/
#include "unicode.h"
#define UTF8_ACCEPT 0
#define UTF8_REJECT 1
static const uint8_t utf8d[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00..1f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 20..3f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40..5f */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60..7f */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9,
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, /* 80..9f */
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, /* a0..bf */
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* c0..df */
0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
1, 0, 1, 0, 1, 1, 1, 1, 1, 1, /* s1..s2 */
1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 1, 1, 1, 1, 1, 1, 1, 1, /* s3..s4 */
1, 2, 1, 1, 1, 1, 1, 1, 1, 2, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 3, 1, 3, 1, 1, 1, 1, 1, 1, /* s5..s6 */
1, 3, 1, 1, 1, 1, 1, 3, 1, 3, 1,
1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* s7..s8 */
};
/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
* <bjoern@hoehrmann.de> */
/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
uint32_t type = utf8d[byte];
*codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
: (0xff >> type) & (byte);
*state = utf8d[256 + *state * 16 + type];
return *state;
}
size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
struct _cbor_unicode_status* status) {
*status =
(struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
uint32_t codepoint, state = UTF8_ACCEPT, res;
size_t pos = 0, count = 0;
for (; pos < source_length; pos++) {
res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
if (res == UTF8_ACCEPT) {
count++;
} else if (res == UTF8_REJECT) {
goto error;
}
}
/* Unfinished multibyte codepoint */
if (state != UTF8_ACCEPT) goto error;
return count;
error:
*status = (struct _cbor_unicode_status){.location = pos,
.status = _CBOR_UNICODE_BADCP};
return -1;
}
|