]> Git Repo - J-u-boot.git/blame - lib/charset.c
efi_loader: move codepage 437 table
[J-u-boot.git] / lib / charset.c
CommitLineData
f739fcd8 1// SPDX-License-Identifier: GPL-2.0+
78178bb0
RC
2/*
3 * charset conversion utils
4 *
5 * Copyright (c) 2017 Rob Clark
78178bb0
RC
6 */
7
35cbb796 8#include <common.h>
78178bb0 9#include <charset.h>
b5130a81 10#include <capitalization.h>
70616a1e 11#include <cp437.h>
6974a4a3 12#include <efi_loader.h>
78178bb0
RC
13#include <malloc.h>
14
70616a1e
HS
15/**
16 * codepage_437 - Unicode to codepage 437 translation table
17 */
18const u16 codepage_437[128] = CP437;
19
b5130a81
HS
20static struct capitalization_table capitalization_table[] =
21#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
22 UNICODE_CAPITALIZATION_TABLE;
23#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
24 CP1250_CAPITALIZATION_TABLE;
25#else
26 CP437_CAPITALIZATION_TABLE;
27#endif
28
35cbb796
HS
29/**
30 * get_code() - read Unicode code point from UTF-8 stream
31 *
32 * @read_u8: - stream reader
33 * @src: - string buffer passed to stream reader, optional
34 * Return: - Unicode code point
35 */
36static int get_code(u8 (*read_u8)(void *data), void *data)
d8c28232 37{
35cbb796 38 s32 ch = 0;
d8c28232 39
35cbb796
HS
40 ch = read_u8(data);
41 if (!ch)
d8c28232 42 return 0;
35cbb796
HS
43 if (ch >= 0xc2 && ch <= 0xf4) {
44 int code = 0;
45
46 if (ch >= 0xe0) {
47 if (ch >= 0xf0) {
d8c28232 48 /* 0xf0 - 0xf4 */
35cbb796
HS
49 ch &= 0x07;
50 code = ch << 18;
51 ch = read_u8(data);
52 if (ch < 0x80 || ch > 0xbf)
53 goto error;
54 ch &= 0x3f;
d8c28232
HS
55 } else {
56 /* 0xe0 - 0xef */
35cbb796 57 ch &= 0x0f;
d8c28232 58 }
35cbb796 59 code += ch << 12;
d8c28232
HS
60 if ((code >= 0xD800 && code <= 0xDFFF) ||
61 code >= 0x110000)
35cbb796
HS
62 goto error;
63 ch = read_u8(data);
64 if (ch < 0x80 || ch > 0xbf)
65 goto error;
d8c28232
HS
66 }
67 /* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
35cbb796
HS
68 ch &= 0x3f;
69 code += ch << 6;
70 ch = read_u8(data);
71 if (ch < 0x80 || ch > 0xbf)
72 goto error;
73 ch &= 0x3f;
74 ch += code;
75 } else if (ch >= 0x80) {
76 goto error;
d8c28232 77 }
35cbb796
HS
78 return ch;
79error:
80 return '?';
81}
82
83/**
84 * read_string() - read byte from character string
85 *
86 * @data: - pointer to string
87 * Return: - byte read
88 *
89 * The string pointer is incremented if it does not point to '\0'.
90 */
91static u8 read_string(void *data)
92
93{
94 const char **src = (const char **)data;
95 u8 c;
96
97 if (!src || !*src || !**src)
98 return 0;
99 c = **src;
d8c28232 100 ++*src;
35cbb796
HS
101 return c;
102}
103
104/**
105 * read_console() - read byte from console
106 *
60d79876
HS
107 * @data - not used, needed to match interface
108 * Return: - byte read or 0 on error
35cbb796
HS
109 */
110static u8 read_console(void *data)
111{
60d79876
HS
112 int ch;
113
c670aeee 114 ch = getchar();
60d79876
HS
115 if (ch < 0)
116 ch = 0;
117 return ch;
35cbb796
HS
118}
119
120int console_read_unicode(s32 *code)
121{
122 if (!tstc()) {
123 /* No input available */
124 return 1;
125 }
126
127 /* Read Unicode code */
128 *code = get_code(read_console, NULL);
129 return 0;
130}
131
132s32 utf8_get(const char **src)
133{
134 return get_code(read_string, src);
d8c28232
HS
135}
136
137int utf8_put(s32 code, char **dst)
138{
139 if (!dst || !*dst)
140 return -1;
141 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
142 return -1;
143 if (code <= 0x007F) {
144 **dst = code;
145 } else {
146 if (code <= 0x07FF) {
147 **dst = code >> 6 | 0xC0;
148 } else {
149 if (code < 0x10000) {
150 **dst = code >> 12 | 0xE0;
151 } else {
152 **dst = code >> 18 | 0xF0;
153 ++*dst;
154 **dst = (code >> 12 & 0x3F) | 0x80;
155 }
156 ++*dst;
157 **dst = (code >> 6 & 0x3F) | 0x80;
158 }
159 ++*dst;
160 **dst = (code & 0x3F) | 0x80;
161 }
162 ++*dst;
163 return 0;
164}
165
166size_t utf8_utf16_strnlen(const char *src, size_t count)
167{
168 size_t len = 0;
169
170 for (; *src && count; --count) {
171 s32 code = utf8_get(&src);
172
173 if (!code)
174 break;
175 if (code < 0) {
176 /* Reserve space for a replacement character */
177 len += 1;
178 } else if (code < 0x10000) {
179 len += 1;
180 } else {
181 len += 2;
182 }
183 }
184 return len;
185}
186
187int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
188{
189 if (!src || !dst || !*dst)
190 return -1;
191
192 for (; count && *src; --count) {
193 s32 code = utf8_get(&src);
194
195 if (code < 0)
196 code = '?';
197 utf16_put(code, dst);
198 }
199 **dst = 0;
200 return 0;
201}
202
203s32 utf16_get(const u16 **src)
204{
205 s32 code, code2;
206
207 if (!src || !*src)
208 return -1;
209 if (!**src)
210 return 0;
211 code = **src;
212 ++*src;
213 if (code >= 0xDC00 && code <= 0xDFFF)
214 return -1;
215 if (code >= 0xD800 && code <= 0xDBFF) {
216 if (!**src)
217 return -1;
218 code &= 0x3ff;
219 code <<= 10;
220 code += 0x10000;
221 code2 = **src;
222 ++*src;
223 if (code2 <= 0xDC00 || code2 >= 0xDFFF)
224 return -1;
225 code2 &= 0x3ff;
226 code += code2;
227 }
228 return code;
229}
230
231int utf16_put(s32 code, u16 **dst)
232{
233 if (!dst || !*dst)
234 return -1;
235 if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
236 return -1;
237 if (code < 0x10000) {
238 **dst = code;
239 } else {
240 code -= 0x10000;
241 **dst = code >> 10 | 0xD800;
242 ++*dst;
243 **dst = (code & 0x3ff) | 0xDC00;
244 }
245 ++*dst;
246 return 0;
247}
248
249size_t utf16_strnlen(const u16 *src, size_t count)
250{
251 size_t len = 0;
252
253 for (; *src && count; --count) {
254 s32 code = utf16_get(&src);
255
256 if (!code)
257 break;
258 /*
259 * In case of an illegal sequence still reserve space for a
260 * replacement character.
261 */
262 ++len;
263 }
264 return len;
265}
266
267size_t utf16_utf8_strnlen(const u16 *src, size_t count)
268{
269 size_t len = 0;
270
271 for (; *src && count; --count) {
272 s32 code = utf16_get(&src);
273
274 if (!code)
275 break;
276 if (code < 0)
277 /* Reserve space for a replacement character */
278 len += 1;
279 else if (code < 0x80)
280 len += 1;
281 else if (code < 0x800)
282 len += 2;
283 else if (code < 0x10000)
284 len += 3;
285 else
286 len += 4;
287 }
288 return len;
289}
290
291int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
292{
293 if (!src || !dst || !*dst)
294 return -1;
295
296 for (; count && *src; --count) {
297 s32 code = utf16_get(&src);
298
299 if (code < 0)
300 code = '?';
301 utf8_put(code, dst);
302 }
303 **dst = 0;
304 return 0;
305}
306
b5130a81
HS
307s32 utf_to_lower(const s32 code)
308{
309 struct capitalization_table *pos = capitalization_table;
310 s32 ret = code;
311
312 if (code <= 0x7f) {
313 if (code >= 'A' && code <= 'Z')
314 ret += 0x20;
315 return ret;
316 }
317 for (; pos->upper; ++pos) {
318 if (pos->upper == code) {
319 ret = pos->lower;
320 break;
321 }
322 }
323 return ret;
324}
325
326s32 utf_to_upper(const s32 code)
327{
328 struct capitalization_table *pos = capitalization_table;
329 s32 ret = code;
330
331 if (code <= 0x7f) {
332 if (code >= 'a' && code <= 'z')
333 ret -= 0x20;
334 return ret;
335 }
336 for (; pos->lower; ++pos) {
337 if (pos->lower == code) {
338 ret = pos->upper;
339 break;
340 }
341 }
342 return ret;
343}
78178bb0 344
f8062c96
AT
345/*
346 * u16_strncmp() - compare two u16 string
347 *
348 * @s1: first string to compare
349 * @s2: second string to compare
350 * @n: maximum number of u16 to compare
351 * Return: 0 if the first n u16 are the same in s1 and s2
352 * < 0 if the first different u16 in s1 is less than the
353 * corresponding u16 in s2
354 * > 0 if the first different u16 in s1 is greater than the
355 * corresponding u16 in s2
356 */
357int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
358{
359 int ret = 0;
360
361 for (; n; --n, ++s1, ++s2) {
362 ret = *s1 - *s2;
363 if (ret || !*s1)
364 break;
365 }
366
367 return ret;
368}
369
317068b8 370size_t u16_strlen(const void *in)
78178bb0 371{
317068b8
HS
372 const char *pos = in;
373 size_t ret;
374
375 for (; pos[0] || pos[1]; pos += 2)
376 ;
377 ret = pos - (char *)in;
378 ret >>= 1;
379 return ret;
78178bb0
RC
380}
381
6974a4a3 382size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
78178bb0
RC
383{
384 size_t i;
385 for (i = 0; count-- && in[i]; i++);
386 return i;
387}
388
4835d35a
SG
389size_t u16_strsize(const void *in)
390{
391 return (u16_strlen(in) + 1) * sizeof(u16);
392}
393
2a3537ae
AT
394u16 *u16_strcpy(u16 *dest, const u16 *src)
395{
396 u16 *tmp = dest;
397
398 for (;; dest++, src++) {
399 *dest = *src;
400 if (!*src)
401 break;
402 }
403
404 return tmp;
405}
406
317068b8 407u16 *u16_strdup(const void *src)
2a3537ae
AT
408{
409 u16 *new;
317068b8 410 size_t len;
2a3537ae
AT
411
412 if (!src)
413 return NULL;
317068b8
HS
414 len = (u16_strlen(src) + 1) * sizeof(u16);
415 new = malloc(len);
2a3537ae
AT
416 if (!new)
417 return NULL;
317068b8 418 memcpy(new, src, len);
2a3537ae
AT
419
420 return new;
421}
422
78178bb0
RC
423/* Convert UTF-16 to UTF-8. */
424uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
425{
426 uint32_t code_high = 0;
427
428 while (size--) {
429 uint32_t code = *src++;
430
431 if (code_high) {
432 if (code >= 0xDC00 && code <= 0xDFFF) {
433 /* Surrogate pair. */
434 code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
435
436 *dest++ = (code >> 18) | 0xF0;
437 *dest++ = ((code >> 12) & 0x3F) | 0x80;
438 *dest++ = ((code >> 6) & 0x3F) | 0x80;
439 *dest++ = (code & 0x3F) | 0x80;
440 } else {
441 /* Error... */
442 *dest++ = '?';
443 /* *src may be valid. Don't eat it. */
444 src--;
445 }
446
447 code_high = 0;
448 } else {
449 if (code <= 0x007F) {
450 *dest++ = code;
451 } else if (code <= 0x07FF) {
452 *dest++ = (code >> 6) | 0xC0;
453 *dest++ = (code & 0x3F) | 0x80;
454 } else if (code >= 0xD800 && code <= 0xDBFF) {
455 code_high = code;
456 continue;
457 } else if (code >= 0xDC00 && code <= 0xDFFF) {
458 /* Error... */
459 *dest++ = '?';
460 } else if (code < 0x10000) {
461 *dest++ = (code >> 12) | 0xE0;
462 *dest++ = ((code >> 6) & 0x3F) | 0x80;
463 *dest++ = (code & 0x3F) | 0x80;
464 } else {
465 *dest++ = (code >> 18) | 0xF0;
466 *dest++ = ((code >> 12) & 0x3F) | 0x80;
467 *dest++ = ((code >> 6) & 0x3F) | 0x80;
468 *dest++ = (code & 0x3F) | 0x80;
469 }
470 }
471 }
472
473 return dest;
474}
This page took 0.232186 seconds and 4 git commands to generate.