aboutsummaryrefslogtreecommitdiff
path: root/subversion/libsvn_subr/utf8proc.c
blob: 0e22af8d94ae56babe6917f13f7acc26f244204d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
/*
 * utf8proc.c:  Wrappers for the utf8proc library
 *
 * ====================================================================
 *    Licensed to the Apache Software Foundation (ASF) under one
 *    or more contributor license agreements.  See the NOTICE file
 *    distributed with this work for additional information
 *    regarding copyright ownership.  The ASF licenses this file
 *    to you under the Apache License, Version 2.0 (the
 *    "License"); you may not use this file except in compliance
 *    with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing,
 *    software distributed under the License is distributed on an
 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *    KIND, either express or implied.  See the License for the
 *    specific language governing permissions and limitations
 *    under the License.
 * ====================================================================
 */



#include <apr_fnmatch.h>

#include "private/svn_string_private.h"
#include "private/svn_utf_private.h"
#include "svn_private_config.h"

#if SVN_INTERNAL_UTF8PROC
#define UTF8PROC_INLINE
/* Somehow utf8proc thinks it is nice to use strlen as an argument name,
   while this function is already defined via apr.h */
#define strlen svn__strlen_var
#include "utf8proc/utf8proc.c"
#undef strlen
#else
#include <utf8proc.h>
#endif



const char *
svn_utf__utf8proc_compiled_version(void)
{
  static const char utf8proc_version[] =
                                  APR_STRINGIFY(UTF8PROC_VERSION_MAJOR) "."
                                  APR_STRINGIFY(UTF8PROC_VERSION_MINOR) "."
                                  APR_STRINGIFY(UTF8PROC_VERSION_PATCH);
  return utf8proc_version;
}

const char *
svn_utf__utf8proc_runtime_version(void)
{
  /* Unused static function warning removal hack. */
  SVN_UNUSED(utf8proc_grapheme_break);
  SVN_UNUSED(utf8proc_tolower);
  SVN_UNUSED(utf8proc_toupper);
#if UTF8PROC_VERSION_MAJOR >= 2
  SVN_UNUSED(utf8proc_totitle);
#endif
  SVN_UNUSED(utf8proc_charwidth);
  SVN_UNUSED(utf8proc_category_string);
  SVN_UNUSED(utf8proc_NFD);
  SVN_UNUSED(utf8proc_NFC);
  SVN_UNUSED(utf8proc_NFKD);
  SVN_UNUSED(utf8proc_NFKC);

  return utf8proc_version();
}



/* Fill the given BUFFER with decomposed UCS-4 representation of the
 * UTF-8 STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING
 * is NUL-terminated; otherwise look only at the first LENGTH bytes in
 * STRING. Upon return, BUFFER->data points at an array of UCS-4
 * characters, and return the length of the array. TRANSFORM_FLAGS
 * define exactly how the decomposition is performed.
 *
 * A negative return value is an utf8proc error code and may indicate
 * that STRING contains invalid UTF-8 or was so long that an overflow
 * occurred.
 */
static apr_ssize_t
unicode_decomposition(int transform_flags,
                      const char *string, apr_size_t length,
                      svn_membuf_t *buffer)
{
  const int nullterm = (length == SVN_UTF__UNKNOWN_LENGTH
                        ? UTF8PROC_NULLTERM : 0);

  for (;;)
    {
      apr_int32_t *const ucs4buf = buffer->data;
      const apr_ssize_t ucs4len = buffer->size / sizeof(*ucs4buf);
      const apr_ssize_t result =
        utf8proc_decompose((const void*) string, length, ucs4buf, ucs4len,
                           UTF8PROC_DECOMPOSE | UTF8PROC_STABLE
                           | transform_flags | nullterm);

      if (result < 0 || result <= ucs4len)
        return result;

      /* Increase the decomposition buffer size and retry */
      svn_membuf__ensure(buffer, result * sizeof(*ucs4buf));
    }
}

/* Fill the given BUFFER with an NFD UCS-4 representation of the UTF-8
 * STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING is
 * NUL-terminated; otherwise look only at the first LENGTH bytes in
 * STRING. Upon return, BUFFER->data points at an array of UCS-4
 * characters and *RESULT_LENGTH contains the length of the array.
 *
 * A returned error may indicate that STRING contains invalid UTF-8 or
 * invalid Unicode codepoints. Any error message comes from utf8proc.
 */
static svn_error_t *
decompose_normalized(apr_size_t *result_length,
                     const char *string, apr_size_t length,
                     svn_membuf_t *buffer)
{
  apr_ssize_t result = unicode_decomposition(0, string, length, buffer);
  if (result < 0)
    return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL,
                            gettext(utf8proc_errmsg(result)));
  *result_length = result;
  return SVN_NO_ERROR;
}

/* Fill the given BUFFER with an NFC UTF-8 representation of the UTF-8
 * STRING. If LENGTH is SVN_UTF__UNKNOWN_LENGTH, assume STRING is
 * NUL-terminated; otherwise look only at the first LENGTH bytes in
 * STRING. Upon return, BUFFER->data points at a NUL-terminated string
 * of UTF-8 characters.
 *
 * If CASEFOLD is non-zero, perform Unicode case folding, e.g., for
 * case-insensitive string comparison. If STRIPMARK is non-zero, strip
 * all diacritical marks (e.g., accents) from the string.
 *
 * A returned error may indicate that STRING contains invalid UTF-8 or
 * invalid Unicode codepoints. Any error message comes from utf8proc.
 */
static svn_error_t *
normalize_cstring(apr_size_t *result_length,
                  const char *string, apr_size_t length,
                  svn_boolean_t casefold,
                  svn_boolean_t stripmark,
                  svn_membuf_t *buffer)
{
  int flags = 0;
  apr_ssize_t result;

  if (casefold)
    flags |= UTF8PROC_CASEFOLD;

  if (stripmark)
    flags |= UTF8PROC_STRIPMARK;

  result = unicode_decomposition(flags, string, length, buffer);
  if (result >= 0)
    {
      svn_membuf__resize(buffer, result * sizeof(apr_int32_t) + 1);
      result = utf8proc_reencode(buffer->data, result,
                                 UTF8PROC_COMPOSE | UTF8PROC_STABLE);
    }
  if (result < 0)
    return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL,
                            gettext(utf8proc_errmsg(result)));
  *result_length = result;
  return SVN_NO_ERROR;
}

/* Compare two arrays of UCS-4 codes, BUFA of length LENA and BUFB of
 * length LENB. Return 0 if they're equal, a negative value if BUFA is
 * less than BUFB, otherwise a positive value.
 *
 * Yes, this is strcmp for known-length UCS-4 strings.
 */
static int
ucs4cmp(const apr_int32_t *bufa, apr_size_t lena,
        const apr_int32_t *bufb, apr_size_t lenb)
{
  const apr_size_t len = (lena < lenb ? lena : lenb);
  apr_size_t i;

  for (i = 0; i < len; ++i)
    {
      const int diff = bufa[i] - bufb[i];
      if (diff)
        return diff;
    }
  return (lena == lenb ? 0 : (lena < lenb ? -1 : 1));
}

svn_error_t *
svn_utf__normcmp(int *result,
                 const char *str1, apr_size_t len1,
                 const char *str2, apr_size_t len2,
                 svn_membuf_t *buf1, svn_membuf_t *buf2)
{
  apr_size_t buflen1;
  apr_size_t buflen2;

  /* Shortcut-circuit the decision if at least one of the strings is empty. */
  const svn_boolean_t empty1 =
    (0 == len1 || (len1 == SVN_UTF__UNKNOWN_LENGTH && !*str1));
  const svn_boolean_t empty2 =
    (0 == len2 || (len2 == SVN_UTF__UNKNOWN_LENGTH && !*str2));
  if (empty1 || empty2)
    {
      *result = (empty1 == empty2 ? 0 : (empty1 ? -1 : 1));
      return SVN_NO_ERROR;
    }

  SVN_ERR(decompose_normalized(&buflen1, str1, len1, buf1));
  SVN_ERR(decompose_normalized(&buflen2, str2, len2, buf2));
  *result = ucs4cmp(buf1->data, buflen1, buf2->data, buflen2);
  return SVN_NO_ERROR;
}

svn_error_t*
svn_utf__normalize(const char **result,
                   const char *str, apr_size_t len,
                   svn_membuf_t *buf)
{
  apr_size_t result_length;
  SVN_ERR(normalize_cstring(&result_length, str, len, FALSE, FALSE, buf));
  *result = (const char*)(buf->data);
  return SVN_NO_ERROR;
}

svn_error_t *
svn_utf__xfrm(const char **result,
              const char *str, apr_size_t len,
              svn_boolean_t case_insensitive,
              svn_boolean_t accent_insensitive,
              svn_membuf_t *buf)
{
  apr_size_t result_length;
  SVN_ERR(normalize_cstring(&result_length, str, len,
                            case_insensitive, accent_insensitive, buf));
  *result = (const char*)(buf->data);
  return SVN_NO_ERROR;
}

svn_boolean_t
svn_utf__fuzzy_glob_match(const char *str,
                          const apr_array_header_t *patterns,
                          svn_membuf_t *buf)
{
  const char *normalized;
  svn_error_t *err;
  int i;

  /* Try to normalize case and accents in STR.
   *
   * If that should fail for some reason, consider STR a mismatch. */
  err = svn_utf__xfrm(&normalized, str, strlen(str), TRUE, TRUE, buf);
  if (err)
    {
      svn_error_clear(err);
      return FALSE;
    }

  /* Now see whether it matches any/all of the patterns. */
  for (i = 0; i < patterns->nelts; ++i)
    {
      const char *pattern = APR_ARRAY_IDX(patterns, i, const char *);
      if (apr_fnmatch(pattern, normalized, 0) == APR_SUCCESS)
        return TRUE;
    }

  return FALSE;
}

/* Decode a single UCS-4 code point to UTF-8, appending the result to BUFFER.
 * Assume BUFFER is already filled to *LENGTH and return the new size there.
 * This function does *not* nul-terminate the stringbuf!
 *
 * A returned error indicates that the codepoint is invalid.
 */
static svn_error_t *
encode_ucs4(svn_membuf_t *buffer, apr_int32_t ucs4chr, apr_size_t *length)
{
  apr_size_t utf8len;

  if (buffer->size - *length < 4)
    svn_membuf__resize(buffer, buffer->size + 4);

  utf8len = utf8proc_encode_char(ucs4chr, ((apr_byte_t*)buffer->data + *length));
  if (!utf8len)
    return svn_error_createf(SVN_ERR_UTF8PROC_ERROR, NULL,
                             _("Invalid Unicode character U+%04lX"),
                             (long)ucs4chr);
  *length += utf8len;
  return SVN_NO_ERROR;
}

svn_error_t *
svn_utf__encode_ucs4_string(svn_membuf_t *buffer,
                            const apr_int32_t *ucs4str,
                            apr_size_t length,
                            apr_size_t *result_length)
{
  *result_length = 0;
  while (length-- > 0)
    SVN_ERR(encode_ucs4(buffer, *ucs4str++, result_length));
  svn_membuf__resize(buffer, *result_length + 1);
  ((char*)buffer->data)[*result_length] = '\0';
  return SVN_NO_ERROR;
}


svn_error_t *
svn_utf__glob(svn_boolean_t *match,
              const char *pattern, apr_size_t pattern_len,
              const char *string, apr_size_t string_len,
              const char *escape, apr_size_t escape_len,
              svn_boolean_t sql_like,
              svn_membuf_t *pattern_buf,
              svn_membuf_t *string_buf,
              svn_membuf_t *temp_buf)
{
  apr_size_t patternbuf_len;
  apr_size_t tempbuf_len;

  /* If we're in GLOB mode, we don't do custom escape chars. */
  if (escape && !sql_like)
    return svn_error_create(SVN_ERR_UTF8_GLOB, NULL,
                            _("Cannot use a custom escape token"
                              " in glob matching mode"));

  /* Convert the patern to NFD UTF-8. We can't use the UCS-4 result
     because apr_fnmatch can't handle it.*/
  SVN_ERR(decompose_normalized(&tempbuf_len, pattern, pattern_len, temp_buf));
  if (!sql_like)
    SVN_ERR(svn_utf__encode_ucs4_string(pattern_buf, temp_buf->data,
                                        tempbuf_len, &patternbuf_len));
  else
    {
      /* Convert a LIKE pattern to a GLOB pattern that apr_fnmatch can use. */
      const apr_int32_t *like = temp_buf->data;
      apr_int32_t ucs4esc;
      svn_boolean_t escaped;
      apr_size_t i;

      if (!escape)
        ucs4esc = -1;           /* Definitely an invalid UCS-4 character. */
      else
        {
          const int nullterm = (escape_len == SVN_UTF__UNKNOWN_LENGTH
                                ? UTF8PROC_NULLTERM : 0);
          apr_ssize_t result =
            utf8proc_decompose((const void*) escape, escape_len, &ucs4esc, 1,
                               UTF8PROC_DECOMPOSE | UTF8PROC_STABLE | nullterm);
          if (result < 0)
            return svn_error_create(SVN_ERR_UTF8PROC_ERROR, NULL,
                                    gettext(utf8proc_errmsg(result)));
          if (result == 0 || result > 1)
            return svn_error_create(SVN_ERR_UTF8_GLOB, NULL,
                                    _("Escape token must be one character"));
          if ((ucs4esc & 0xFF) != ucs4esc)
            return svn_error_createf(SVN_ERR_UTF8_GLOB, NULL,
                                     _("Invalid escape character U+%04lX"),
                                     (long)ucs4esc);
        }

      patternbuf_len = 0;
      svn_membuf__ensure(pattern_buf, tempbuf_len + 1);
      for (i = 0, escaped = FALSE; i < tempbuf_len; ++i, ++like)
        {
          if (*like == ucs4esc && !escaped)
            {
              svn_membuf__resize(pattern_buf, patternbuf_len + 1);
              ((char*)pattern_buf->data)[patternbuf_len++] = '\\';
              escaped = TRUE;
            }
          else if (escaped)
            {
              SVN_ERR(encode_ucs4(pattern_buf, *like, &patternbuf_len));
              escaped = FALSE;
            }
          else
            {
              if ((*like == '[' || *like == '\\') && !escaped)
                {
                  /* Escape brackets and backslashes which are always
                     literals in LIKE patterns. */
                  svn_membuf__resize(pattern_buf, patternbuf_len + 1);
                  ((char*)pattern_buf->data)[patternbuf_len++] = '\\';
                  escaped = TRUE;
                  --i; --like;
                  continue;
                }

              /* Replace LIKE wildcards with their GLOB equivalents. */
              if (*like == '%' || *like == '_')
                {
                  const char wildcard = (*like == '%' ? '*' : '?');
                  svn_membuf__resize(pattern_buf, patternbuf_len + 1);
                  ((char*)pattern_buf->data)[patternbuf_len++] = wildcard;
                }
              else
                SVN_ERR(encode_ucs4(pattern_buf, *like, &patternbuf_len));
            }
        }
      svn_membuf__resize(pattern_buf, patternbuf_len + 1);
      ((char*)pattern_buf->data)[patternbuf_len] = '\0';
    }

  /* Now normalize the string */
  SVN_ERR(decompose_normalized(&tempbuf_len, string, string_len, temp_buf));
  SVN_ERR(svn_utf__encode_ucs4_string(string_buf, temp_buf->data,
                                      tempbuf_len, &tempbuf_len));

  *match = !apr_fnmatch(pattern_buf->data, string_buf->data, 0);
  return SVN_NO_ERROR;
}

svn_boolean_t
svn_utf__is_normalized(const char *string, apr_pool_t *scratch_pool)
{
  svn_error_t *err;
  svn_membuf_t buffer;
  apr_size_t result_length;
  const apr_size_t length = strlen(string);
  svn_membuf__create(&buffer, length * sizeof(apr_int32_t), scratch_pool);
  err = normalize_cstring(&result_length, string, length,
                          FALSE, FALSE, &buffer);
  if (err)
    {
      svn_error_clear(err);
      return FALSE;
    }
  return (length == result_length && 0 == strcmp(string, buffer.data));
}

const char *
svn_utf__fuzzy_escape(const char *src, apr_size_t length, apr_pool_t *pool)
{
  /* Hexadecimal digits for code conversion. */
  static const char digits[] = "0123456789ABCDEF";

  /* Flags used for Unicode decomposition. */
  static const int decomp_flags = (
      UTF8PROC_COMPAT | UTF8PROC_STABLE | UTF8PROC_LUMP
      | UTF8PROC_NLF2LF | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK);

  svn_stringbuf_t *result;
  svn_membuf_t buffer;
  apr_ssize_t decomp_length;
  apr_ssize_t len;

  /* Decompose to a non-reversible compatibility format. */
  svn_membuf__create(&buffer, length * sizeof(apr_int32_t), pool);
  decomp_length = unicode_decomposition(decomp_flags, src, length, &buffer);
  if (decomp_length < 0)
    {
      svn_membuf_t part;
      apr_size_t done, prev;

      /* The only other error we can receive here indicates an integer
         overflow due to the length of the input string. Not very
         likely, but we certainly shouldn't continue in that case. */
      SVN_ERR_ASSERT_NO_RETURN(decomp_length == UTF8PROC_ERROR_INVALIDUTF8);

      /* Break the decomposition into parts that are valid UTF-8, and
         bytes that are not. Represent the invalid bytes in the target
         erray by their negative value. This works because utf8proc
         will not generate Unicode code points with values larger than
         U+10FFFF. */
      svn_membuf__create(&part, sizeof(apr_int32_t), pool);
      decomp_length = 0;
      done = prev = 0;
      while (done < length)
        {
          apr_int32_t uc;

          while (done < length)
            {
              len = utf8proc_iterate((apr_byte_t*)src + done, length - done, &uc);
              if (len < 0)
                break;
              done += len;
            }

          /* Decompose the valid part */
          if (done > prev)
            {
              len = unicode_decomposition(
                  decomp_flags, src + prev, done - prev, &part);
              SVN_ERR_ASSERT_NO_RETURN(len > 0);
              svn_membuf__resize(
                  &buffer, (decomp_length + len) * sizeof(apr_int32_t));
              memcpy((apr_int32_t*)buffer.data + decomp_length,
                     part.data, len * sizeof(apr_int32_t));
              decomp_length += len;
              prev = done;
            }

          /* What follows could be a valid UTF-8 sequence, but not
             a valid Unicode character. */
          if (done < length)
            {
              const char *last;

              /* Determine the length of the UTF-8 sequence */
              const char *const p = src + done;
              len = utf8proc_utf8class[(apr_byte_t)*p];

              /* Check if the multi-byte sequence is valid UTF-8. */
              if (len > 1 && len <= (apr_ssize_t)(length - done))
                last = svn_utf__last_valid(p, len);
              else
                last = NULL;

              /* Might not be a valid UTF-8 sequence at all */
              if (!last || (last && last - p < len))
                {
                  uc = -((apr_int32_t)(*p & 0xff));
                  len = 1;
                }
              else
                {
                  switch (len)
                    {
                      /* Decode the UTF-8 sequence without validation. */
                    case 2:
                      uc = ((p[0] & 0x1f) <<  6) + (p[1] & 0x3f);
                      break;
                    case 3:
                      uc = (((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) <<  6)
                            + (p[2] & 0x3f));
                      break;
                    case 4:
                      uc = (((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
                            + ((p[2] & 0x3f) <<  6) + (p[3] & 0x3f));
                      break;
                    default:
                      SVN_ERR_ASSERT_NO_RETURN(
                          !"Unexpected invalid UTF-8 byte");
                    }

                }

              svn_membuf__resize(
                  &buffer, (decomp_length + 1) * sizeof(apr_int32_t));
              ((apr_int32_t*)buffer.data)[decomp_length++] = uc;
              done += len;
              prev = done;
            }
        }
    }

  /* Scan the result and deleting any combining diacriticals and
     inserting placeholders where any non-ascii characters remain.  */
  result = svn_stringbuf_create_ensure(decomp_length, pool);
  for (len = 0; len < decomp_length; ++len)
    {
      const apr_int32_t cp = ((apr_int32_t*)buffer.data)[len];
      if (cp > 0 && cp < 127)
        svn_stringbuf_appendbyte(result, (char)cp);
      else if (cp == 0)
        svn_stringbuf_appendcstr(result, "\\0");
      else if (cp < 0)
        {
          const apr_int32_t rcp = ((-cp) & 0xff);
          svn_stringbuf_appendcstr(result, "?\\");
          svn_stringbuf_appendbyte(result, digits[(rcp & 0x00f0) >> 4]);
          svn_stringbuf_appendbyte(result, digits[(rcp & 0x000f)]);
        }
      else
        {
          if (utf8proc_codepoint_valid(cp))
            {
              const utf8proc_property_t *prop = utf8proc_get_property(cp);
              if (prop->combining_class != 0)
                continue;           /* Combining mark; ignore */
              svn_stringbuf_appendcstr(result, "{U+");
            }
          else
            svn_stringbuf_appendcstr(result, "{U?");
          if (cp > 0xffff)
            {
              svn_stringbuf_appendbyte(result, digits[(cp & 0xf00000) >> 20]);
              svn_stringbuf_appendbyte(result, digits[(cp & 0x0f0000) >> 16]);
            }
          svn_stringbuf_appendbyte(result, digits[(cp & 0xf000) >> 12]);
          svn_stringbuf_appendbyte(result, digits[(cp & 0x0f00) >> 8]);
          svn_stringbuf_appendbyte(result, digits[(cp & 0x00f0) >> 4]);
          svn_stringbuf_appendbyte(result, digits[(cp & 0x000f)]);
          svn_stringbuf_appendbyte(result, '}');
        }
    }

  return result->data;
}