]>
Commit | Line | Data |
---|---|---|
78178bb0 RC |
1 | /* |
2 | * charset conversion utils | |
3 | * | |
4 | * Copyright (c) 2017 Rob Clark | |
5 | * | |
6 | * SPDX-License-Identifier: GPL-2.0+ | |
7 | */ | |
8 | ||
9 | #include <common.h> | |
10 | #include <charset.h> | |
11 | #include <malloc.h> | |
12 | ||
13 | /* | |
14 | * utf8/utf16 conversion mostly lifted from grub | |
15 | */ | |
16 | ||
17 | size_t utf16_strlen(const uint16_t *in) | |
18 | { | |
19 | size_t i; | |
20 | for (i = 0; in[i]; i++); | |
21 | return i; | |
22 | } | |
23 | ||
24 | size_t utf16_strnlen(const uint16_t *in, size_t count) | |
25 | { | |
26 | size_t i; | |
27 | for (i = 0; count-- && in[i]; i++); | |
28 | return i; | |
29 | } | |
30 | ||
31 | uint16_t *utf16_strcpy(uint16_t *dest, const uint16_t *src) | |
32 | { | |
33 | uint16_t *tmp = dest; | |
34 | ||
35 | while ((*dest++ = *src++) != '\0') | |
36 | /* nothing */; | |
37 | return tmp; | |
38 | ||
39 | } | |
40 | ||
41 | uint16_t *utf16_strdup(const uint16_t *s) | |
42 | { | |
43 | uint16_t *new; | |
44 | if (!s || !(new = malloc((utf16_strlen(s) + 1) * 2))) | |
45 | return NULL; | |
46 | utf16_strcpy(new, s); | |
47 | return new; | |
48 | } | |
49 | ||
50 | /* Convert UTF-16 to UTF-8. */ | |
51 | uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size) | |
52 | { | |
53 | uint32_t code_high = 0; | |
54 | ||
55 | while (size--) { | |
56 | uint32_t code = *src++; | |
57 | ||
58 | if (code_high) { | |
59 | if (code >= 0xDC00 && code <= 0xDFFF) { | |
60 | /* Surrogate pair. */ | |
61 | code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000; | |
62 | ||
63 | *dest++ = (code >> 18) | 0xF0; | |
64 | *dest++ = ((code >> 12) & 0x3F) | 0x80; | |
65 | *dest++ = ((code >> 6) & 0x3F) | 0x80; | |
66 | *dest++ = (code & 0x3F) | 0x80; | |
67 | } else { | |
68 | /* Error... */ | |
69 | *dest++ = '?'; | |
70 | /* *src may be valid. Don't eat it. */ | |
71 | src--; | |
72 | } | |
73 | ||
74 | code_high = 0; | |
75 | } else { | |
76 | if (code <= 0x007F) { | |
77 | *dest++ = code; | |
78 | } else if (code <= 0x07FF) { | |
79 | *dest++ = (code >> 6) | 0xC0; | |
80 | *dest++ = (code & 0x3F) | 0x80; | |
81 | } else if (code >= 0xD800 && code <= 0xDBFF) { | |
82 | code_high = code; | |
83 | continue; | |
84 | } else if (code >= 0xDC00 && code <= 0xDFFF) { | |
85 | /* Error... */ | |
86 | *dest++ = '?'; | |
87 | } else if (code < 0x10000) { | |
88 | *dest++ = (code >> 12) | 0xE0; | |
89 | *dest++ = ((code >> 6) & 0x3F) | 0x80; | |
90 | *dest++ = (code & 0x3F) | 0x80; | |
91 | } else { | |
92 | *dest++ = (code >> 18) | 0xF0; | |
93 | *dest++ = ((code >> 12) & 0x3F) | 0x80; | |
94 | *dest++ = ((code >> 6) & 0x3F) | 0x80; | |
95 | *dest++ = (code & 0x3F) | 0x80; | |
96 | } | |
97 | } | |
98 | } | |
99 | ||
100 | return dest; | |
101 | } |