]>
Commit | Line | Data |
---|---|---|
f739fcd8 | 1 | /* SPDX-License-Identifier: GPL-2.0+ */ |
78178bb0 RC |
2 | /* |
3 | * charset conversion utils | |
4 | * | |
5 | * Copyright (c) 2017 Rob Clark | |
78178bb0 RC |
6 | */ |
7 | ||
8 | #ifndef __CHARSET_H_ | |
9 | #define __CHARSET_H_ | |
10 | ||
d8c28232 | 11 | #include <linux/kernel.h> |
f58c5ecb HS |
12 | #include <linux/types.h> |
13 | ||
984f251f | 14 | #define MAX_UTF8_PER_UTF16 3 |
78178bb0 | 15 | |
4bc4798f | 16 | /* |
70616a1e HS |
17 | * codepage_437 - Unicode to codepage 437 translation table |
18 | */ | |
19 | extern const u16 codepage_437[128]; | |
20 | ||
35cbb796 HS |
21 | /** |
22 | * console_read_unicode() - read Unicode code point from console | |
23 | * | |
24 | * @code: pointer to store Unicode code point | |
25 | * Return: 0 = success | |
26 | */ | |
27 | int console_read_unicode(s32 *code); | |
28 | ||
d8c28232 HS |
29 | /** |
30 | * utf8_get() - get next UTF-8 code point from buffer | |
31 | * | |
32 | * @src: pointer to current byte, updated to point to next byte | |
33 | * Return: code point, or 0 for end of string, or -1 if no legal | |
34 | * code point is found. In case of an error src points to | |
35 | * the incorrect byte. | |
36 | */ | |
37 | s32 utf8_get(const char **src); | |
38 | ||
39 | /** | |
40 | * utf8_put() - write UTF-8 code point to buffer | |
41 | * | |
42 | * @code: code point | |
43 | * @dst: pointer to destination buffer, updated to next position | |
44 | * Return: -1 if the input parameters are invalid | |
45 | */ | |
46 | int utf8_put(s32 code, char **dst); | |
47 | ||
48 | /** | |
49 | * utf8_utf16_strnlen() - length of a truncated utf-8 string after conversion | |
50 | * to utf-16 | |
51 | * | |
52 | * @src: utf-8 string | |
53 | * @count: maximum number of code points to convert | |
8a4c443c | 54 | * Return: length in u16 after conversion to utf-16 without the |
d8c28232 | 55 | * trailing \0. If an invalid UTF-8 sequence is hit one |
8a4c443c | 56 | * u16 will be reserved for a replacement character. |
d8c28232 HS |
57 | */ |
58 | size_t utf8_utf16_strnlen(const char *src, size_t count); | |
59 | ||
60 | /** | |
61 | * utf8_utf16_strlen() - length of a utf-8 string after conversion to utf-16 | |
62 | * | |
311da04a | 63 | * @a: utf-8 string |
8a4c443c HS |
64 | * Return: length in u16 after conversion to utf-16 without the |
65 | * trailing \0. If an invalid UTF-8 sequence is hit one | |
66 | * u16 will be reserved for a replacement character. | |
d8c28232 HS |
67 | */ |
68 | #define utf8_utf16_strlen(a) utf8_utf16_strnlen((a), SIZE_MAX) | |
69 | ||
70 | /** | |
71 | * utf8_utf16_strncpy() - copy utf-8 string to utf-16 string | |
72 | * | |
73 | * @dst: destination buffer | |
74 | * @src: source buffer | |
75 | * @count: maximum number of code points to copy | |
76 | * Return: -1 if the input parameters are invalid | |
77 | */ | |
78 | int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count); | |
79 | ||
80 | /** | |
81 | * utf8_utf16_strcpy() - copy utf-8 string to utf-16 string | |
82 | * | |
311da04a HS |
83 | * @d: destination buffer |
84 | * @s: source buffer | |
d8c28232 HS |
85 | * Return: -1 if the input parameters are invalid |
86 | */ | |
87 | #define utf8_utf16_strcpy(d, s) utf8_utf16_strncpy((d), (s), SIZE_MAX) | |
88 | ||
89 | /** | |
90 | * utf16_get() - get next UTF-16 code point from buffer | |
91 | * | |
92 | * @src: pointer to current word, updated to point to next word | |
93 | * Return: code point, or 0 for end of string, or -1 if no legal | |
94 | * code point is found. In case of an error src points to | |
95 | * the incorrect word. | |
96 | */ | |
97 | s32 utf16_get(const u16 **src); | |
98 | ||
99 | /** | |
100 | * utf16_put() - write UTF-16 code point to buffer | |
101 | * | |
102 | * @code: code point | |
103 | * @dst: pointer to destination buffer, updated to next position | |
104 | * Return: -1 if the input parameters are invalid | |
105 | */ | |
106 | int utf16_put(s32 code, u16 **dst); | |
107 | ||
108 | /** | |
109 | * utf16_strnlen() - length of a truncated utf-16 string | |
110 | * | |
111 | * @src: utf-16 string | |
112 | * @count: maximum number of code points to convert | |
113 | * Return: length in code points. If an invalid UTF-16 sequence is | |
114 | * hit one position will be reserved for a replacement | |
115 | * character. | |
116 | */ | |
117 | size_t utf16_strnlen(const u16 *src, size_t count); | |
118 | ||
119 | /** | |
120 | * utf16_utf8_strnlen() - length of a truncated utf-16 string after conversion | |
121 | * to utf-8 | |
122 | * | |
123 | * @src: utf-16 string | |
124 | * @count: maximum number of code points to convert | |
125 | * Return: length in bytes after conversion to utf-8 without the | |
126 | * trailing \0. If an invalid UTF-16 sequence is hit one | |
127 | * byte will be reserved for a replacement character. | |
128 | */ | |
129 | size_t utf16_utf8_strnlen(const u16 *src, size_t count); | |
130 | ||
131 | /** | |
132 | * utf16_utf8_strlen() - length of a utf-16 string after conversion to utf-8 | |
133 | * | |
311da04a | 134 | * @a: utf-16 string |
d8c28232 | 135 | * Return: length in bytes after conversion to utf-8 without the |
8a4c443c HS |
136 | * trailing \0. If an invalid UTF-16 sequence is hit one |
137 | * byte will be reserved for a replacement character. | |
d8c28232 HS |
138 | */ |
139 | #define utf16_utf8_strlen(a) utf16_utf8_strnlen((a), SIZE_MAX) | |
140 | ||
141 | /** | |
142 | * utf16_utf8_strncpy() - copy utf-16 string to utf-8 string | |
143 | * | |
144 | * @dst: destination buffer | |
145 | * @src: source buffer | |
146 | * @count: maximum number of code points to copy | |
147 | * Return: -1 if the input parameters are invalid | |
148 | */ | |
149 | int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count); | |
150 | ||
151 | /** | |
152 | * utf16_utf8_strcpy() - copy utf-16 string to utf-8 string | |
153 | * | |
311da04a HS |
154 | * @d: destination buffer |
155 | * @s: source buffer | |
d8c28232 HS |
156 | * Return: -1 if the input parameters are invalid |
157 | */ | |
158 | #define utf16_utf8_strcpy(d, s) utf16_utf8_strncpy((d), (s), SIZE_MAX) | |
159 | ||
b5130a81 HS |
160 | /** |
161 | * utf_to_lower() - convert a Unicode letter to lower case | |
162 | * | |
163 | * @code: letter to convert | |
164 | * Return: lower case letter or unchanged letter | |
165 | */ | |
166 | s32 utf_to_lower(const s32 code); | |
167 | ||
168 | /** | |
169 | * utf_to_upper() - convert a Unicode letter to upper case | |
170 | * | |
171 | * @code: letter to convert | |
172 | * Return: upper case letter or unchanged letter | |
173 | */ | |
174 | s32 utf_to_upper(const s32 code); | |
175 | ||
311da04a | 176 | /** |
f8062c96 AT |
177 | * u16_strncmp() - compare two u16 string |
178 | * | |
179 | * @s1: first string to compare | |
180 | * @s2: second string to compare | |
181 | * @n: maximum number of u16 to compare | |
182 | * Return: 0 if the first n u16 are the same in s1 and s2 | |
183 | * < 0 if the first different u16 in s1 is less than the | |
184 | * corresponding u16 in s2 | |
185 | * > 0 if the first different u16 in s1 is greater than the | |
186 | * corresponding u16 in s2 | |
187 | */ | |
188 | int u16_strncmp(const u16 *s1, const u16 *s2, size_t n); | |
311da04a HS |
189 | |
190 | /** | |
191 | * u16_strcmp() - compare two u16 string | |
192 | * | |
193 | * @s1: first string to compare | |
194 | * @s2: second string to compare | |
195 | * Return: 0 if the first n u16 are the same in s1 and s2 | |
196 | * < 0 if the first different u16 in s1 is less than the | |
197 | * corresponding u16 in s2 | |
198 | * > 0 if the first different u16 in s1 is greater than the | |
199 | * corresponding u16 in s2 | |
200 | */ | |
f8062c96 AT |
201 | #define u16_strcmp(s1, s2) u16_strncmp((s1), (s2), SIZE_MAX) |
202 | ||
78178bb0 | 203 | /** |
1dde0d57 | 204 | * u16_strlen - count non-zero words |
78178bb0 | 205 | * |
1dde0d57 HS |
206 | * This function matches wsclen() if the -fshort-wchar compiler flag is set. |
207 | * In the EFI context we explicitly need a function handling u16 strings. | |
78178bb0 | 208 | * |
1dde0d57 | 209 | * @in: null terminated u16 string |
311da04a | 210 | * Return: number of non-zero words. |
1dde0d57 | 211 | * This is not the number of utf-16 letters! |
78178bb0 | 212 | */ |
317068b8 | 213 | size_t u16_strlen(const void *in); |
78178bb0 | 214 | |
4835d35a SG |
215 | /** |
216 | * u16_strsize() - count size of u16 string in bytes including the null | |
217 | * character | |
218 | * | |
219 | * Counts the number of bytes occupied by a u16 string | |
220 | * | |
221 | * @in: null terminated u16 string | |
222 | * Return: bytes in a u16 string | |
4835d35a SG |
223 | */ |
224 | size_t u16_strsize(const void *in); | |
225 | ||
78178bb0 | 226 | /** |
31393564 | 227 | * u16_strnlen() - count non-zero words |
78178bb0 | 228 | * |
1dde0d57 HS |
229 | * This function matches wscnlen_s() if the -fshort-wchar compiler flag is set. |
230 | * In the EFI context we explicitly need a function handling u16 strings. | |
78178bb0 | 231 | * |
1dde0d57 HS |
232 | * @in: null terminated u16 string |
233 | * @count: maximum number of words to count | |
311da04a | 234 | * Return: number of non-zero words. |
1dde0d57 | 235 | * This is not the number of utf-16 letters! |
78178bb0 | 236 | */ |
1dde0d57 | 237 | size_t u16_strnlen(const u16 *in, size_t count); |
78178bb0 | 238 | |
2a3537ae AT |
239 | /** |
240 | * u16_strcpy() - copy u16 string | |
241 | * | |
242 | * Copy u16 string pointed to by src, including terminating null word, to | |
243 | * the buffer pointed to by dest. | |
244 | * | |
245 | * @dest: destination buffer | |
246 | * @src: source buffer (null terminated) | |
247 | * Return: 'dest' address | |
248 | */ | |
249 | u16 *u16_strcpy(u16 *dest, const u16 *src); | |
250 | ||
251 | /** | |
252 | * u16_strdup() - duplicate u16 string | |
253 | * | |
254 | * Copy u16 string pointed to by src, including terminating null word, to a | |
255 | * newly allocated buffer. | |
256 | * | |
257 | * @src: source buffer (null terminated) | |
258 | * Return: allocated new buffer on success, NULL on failure | |
259 | */ | |
317068b8 | 260 | u16 *u16_strdup(const void *src); |
2a3537ae | 261 | |
78178bb0 RC |
262 | /** |
263 | * utf16_to_utf8() - Convert an utf16 string to utf8 | |
264 | * | |
265 | * Converts 'size' characters of the utf16 string 'src' to utf8 | |
266 | * written to the 'dest' buffer. | |
267 | * | |
984f251f | 268 | * NOTE that a single utf16 character can generate up to 3 utf8 |
78178bb0 RC |
269 | * characters. See MAX_UTF8_PER_UTF16. |
270 | * | |
311da04a HS |
271 | * @dest: the destination buffer to write the utf8 characters |
272 | * @src: the source utf16 string | |
273 | * @size: the number of utf16 characters to convert | |
274 | * Return: the pointer to the first unwritten byte in 'dest' | |
78178bb0 RC |
275 | */ |
276 | uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size); | |
277 | ||
73bb90ca HS |
278 | /** |
279 | * utf_to_cp() - translate Unicode code point to 8bit codepage | |
280 | * | |
281 | * Codepoints that do not exist in the codepage are rendered as question mark. | |
282 | * | |
283 | * @c: pointer to Unicode code point to be translated | |
284 | * @codepage: Unicode to codepage translation table | |
285 | * Return: 0 on success, -ENOENT if codepoint cannot be translated | |
286 | */ | |
287 | int utf_to_cp(s32 *c, const u16 *codepage); | |
288 | ||
e91789e2 HS |
289 | /** |
290 | * utf8_to_cp437_stream() - convert UTF-8 stream to codepage 437 | |
291 | * | |
292 | * @c: next UTF-8 character to convert | |
293 | * @buffer: buffer, at least 5 characters | |
294 | * Return: next codepage 437 character or 0 | |
295 | */ | |
296 | int utf8_to_cp437_stream(u8 c, char *buffer); | |
297 | ||
298 | /** | |
299 | * utf8_to_utf32_stream() - convert UTF-8 stream to UTF-32 | |
300 | * | |
301 | * @c: next UTF-8 character to convert | |
302 | * @buffer: buffer, at least 5 characters | |
303 | * Return: next codepage 437 character or 0 | |
304 | */ | |
305 | int utf8_to_utf32_stream(u8 c, char *buffer); | |
306 | ||
78178bb0 | 307 | #endif /* __CHARSET_H_ */ |