[J-u-boot.git] / lib / charset.c

// SPDX-License-Identifier: GPL-2.0+
/*
 *  charset conversion utils
 *
 *  Copyright (c) 2017 Rob Clark
 */

#include <common.h>
#include <charset.h>
#include <capitalization.h>
#include <cp437.h>
#include <efi_loader.h>
#include <errno.h>
#include <malloc.h>

/**
 * codepage_437 - Unicode to codepage 437 translation table
 */
const u16 codepage_437[128] = CP437;

static struct capitalization_table capitalization_table[] =
#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
	UNICODE_CAPITALIZATION_TABLE;
#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
	CP1250_CAPITALIZATION_TABLE;
#else
	CP437_CAPITALIZATION_TABLE;
#endif

/**
 * get_code() - read Unicode code point from UTF-8 stream
 *
 * @read_u8:	- stream reader
 * @src:	- string buffer passed to stream reader, optional
 * Return:	- Unicode code point, or -1
 */
static int get_code(u8 (*read_u8)(void *data), void *data)
{
	s32 ch = 0;

	ch = read_u8(data);
	if (!ch)
		return 0;
	if (ch >= 0xc2 && ch <= 0xf4) {
		int code = 0;

		if (ch >= 0xe0) {
			if (ch >= 0xf0) {
				/* 0xf0 - 0xf4 */
				ch &= 0x07;
				code = ch << 18;
				ch = read_u8(data);
				if (ch < 0x80 || ch > 0xbf)
					goto error;
				ch &= 0x3f;
			} else {
				/* 0xe0 - 0xef */
				ch &= 0x0f;
			}
			code += ch << 12;
			if ((code >= 0xD800 && code <= 0xDFFF) ||
			    code >= 0x110000)
				goto error;
			ch = read_u8(data);
			if (ch < 0x80 || ch > 0xbf)
				goto error;
		}
		/* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
		ch &= 0x3f;
		code += ch << 6;
		ch = read_u8(data);
		if (ch < 0x80 || ch > 0xbf)
			goto error;
		ch &= 0x3f;
		ch += code;
	} else if (ch >= 0x80) {
		goto error;
	}
	return ch;
error:
	return -1;
}

/**
 * read_string() - read byte from character string
 *
 * @data:	- pointer to string
 * Return:	- byte read
 *
 * The string pointer is incremented if it does not point to '\0'.
 */
static u8 read_string(void *data)

{
	const char **src = (const char **)data;
	u8 c;

	if (!src || !*src || !**src)
		return 0;
	c = **src;
	++*src;
	return c;
}

/**
 * read_console() - read byte from console
 *
 * @data	- not used, needed to match interface
 * Return:	- byte read or 0 on error
 */
static u8 read_console(void *data)
{
	int ch;

	ch = getchar();
	if (ch < 0)
		ch = 0;
	return ch;
}

int console_read_unicode(s32 *code)
{
	for (;;) {
		s32 c;

		if (!tstc()) {
			/* No input available */
			return 1;
		}

		/* Read Unicode code */
		c = get_code(read_console, NULL);
		if (c > 0) {
			*code = c;
			return 0;
		}
	}
}

s32 utf8_get(const char **src)
{
	return get_code(read_string, src);
}

int utf8_put(s32 code, char **dst)
{
	if (!dst || !*dst)
		return -1;
	if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
		return -1;
	if (code <= 0x007F) {
		**dst = code;
	} else {
		if (code <= 0x07FF) {
			**dst = code >> 6 | 0xC0;
		} else {
			if (code < 0x10000) {
				**dst = code >> 12 | 0xE0;
			} else {
				**dst = code >> 18 | 0xF0;
				++*dst;
				**dst = (code >> 12 & 0x3F) | 0x80;
			}
			++*dst;
			**dst = (code >> 6 & 0x3F) | 0x80;
		}
		++*dst;
		**dst = (code & 0x3F) | 0x80;
	}
	++*dst;
	return 0;
}

size_t utf8_utf16_strnlen(const char *src, size_t count)
{
	size_t len = 0;

	for (; *src && count; --count)  {
		s32 code = utf8_get(&src);

		if (!code)
			break;
		if (code < 0) {
			/* Reserve space for a replacement character */
			len += 1;
		} else if (code < 0x10000) {
			len += 1;
		} else {
			len += 2;
		}
	}
	return len;
}

int utf8_utf16_strncpy(u16 **dst, const char *src, size_t count)
{
	if (!src || !dst || !*dst)
		return -1;

	for (; count && *src; --count) {
		s32 code = utf8_get(&src);

		if (code < 0)
			code = '?';
		utf16_put(code, dst);
	}
	**dst = 0;
	return 0;
}

s32 utf16_get(const u16 **src)
{
	s32 code, code2;

	if (!src || !*src)
		return -1;
	if (!**src)
		return 0;
	code = **src;
	++*src;
	if (code >= 0xDC00 && code <= 0xDFFF)
		return -1;
	if (code >= 0xD800 && code <= 0xDBFF) {
		if (!**src)
			return -1;
		code &= 0x3ff;
		code <<= 10;
		code += 0x10000;
		code2 = **src;
		++*src;
		if (code2 <= 0xDC00 || code2 >= 0xDFFF)
			return -1;
		code2 &= 0x3ff;
		code += code2;
	}
	return code;
}

int utf16_put(s32 code, u16 **dst)
{
	if (!dst || !*dst)
		return -1;
	if ((code >= 0xD800 && code <= 0xDFFF) || code >= 0x110000)
		return -1;
	if (code < 0x10000) {
		**dst = code;
	} else {
		code -= 0x10000;
		**dst = code >> 10 | 0xD800;
		++*dst;
		**dst = (code & 0x3ff) | 0xDC00;
	}
	++*dst;
	return 0;
}

size_t utf16_strnlen(const u16 *src, size_t count)
{
	size_t len = 0;

	for (; *src && count; --count)  {
		s32 code = utf16_get(&src);

		if (!code)
			break;
		/*
		 * In case of an illegal sequence still reserve space for a
		 * replacement character.
		 */
		++len;
	}
	return len;
}

size_t utf16_utf8_strnlen(const u16 *src, size_t count)
{
	size_t len = 0;

	for (; *src && count; --count)  {
		s32 code = utf16_get(&src);

		if (!code)
			break;
		if (code < 0)
			/* Reserve space for a replacement character */
			len += 1;
		else if (code < 0x80)
			len += 1;
		else if (code < 0x800)
			len += 2;
		else if (code < 0x10000)
			len += 3;
		else
			len += 4;
	}
	return len;
}

int utf16_utf8_strncpy(char **dst, const u16 *src, size_t count)
{
	if (!src || !dst || !*dst)
		return -1;

	for (; count && *src; --count) {
		s32 code = utf16_get(&src);

		if (code < 0)
			code = '?';
		utf8_put(code, dst);
	}
	**dst = 0;
	return 0;
}

s32 utf_to_lower(const s32 code)
{
	struct capitalization_table *pos = capitalization_table;
	s32 ret = code;

	if (code <= 0x7f) {
		if (code >= 'A' && code <= 'Z')
			ret += 0x20;
		return ret;
	}
	for (; pos->upper; ++pos) {
		if (pos->upper == code) {
			ret = pos->lower;
			break;
		}
	}
	return ret;
}

s32 utf_to_upper(const s32 code)
{
	struct capitalization_table *pos = capitalization_table;
	s32 ret = code;

	if (code <= 0x7f) {
		if (code >= 'a' && code <= 'z')
			ret -= 0x20;
		return ret;
	}
	for (; pos->lower; ++pos) {
		if (pos->lower == code) {
			ret = pos->upper;
			break;
		}
	}
	return ret;
}

/*
 * u16_strncmp() - compare two u16 string
 *
 * @s1:		first string to compare
 * @s2:		second string to compare
 * @n:		maximum number of u16 to compare
 * Return:	0  if the first n u16 are the same in s1 and s2
 *		< 0 if the first different u16 in s1 is less than the
 *		corresponding u16 in s2
 *		> 0 if the first different u16 in s1 is greater than the
 *		corresponding u16 in s2
 */
int u16_strncmp(const u16 *s1, const u16 *s2, size_t n)
{
	int ret = 0;

	for (; n; --n, ++s1, ++s2) {
		ret = *s1 - *s2;
		if (ret || !*s1)
			break;
	}

	return ret;
}

size_t u16_strlen(const void *in)
{
	const char *pos = in;
	size_t ret;

	for (; pos[0] || pos[1]; pos += 2)
		;
	ret = pos - (char *)in;
	ret >>= 1;
	return ret;
}

size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
{
	size_t i;
	for (i = 0; count-- && in[i]; i++);
	return i;
}

size_t u16_strsize(const void *in)
{
	return (u16_strlen(in) + 1) * sizeof(u16);
}

u16 *u16_strcpy(u16 *dest, const u16 *src)
{
	u16 *tmp = dest;

	for (;; dest++, src++) {
		*dest = *src;
		if (!*src)
			break;
	}

	return tmp;
}

u16 *u16_strdup(const void *src)
{
	u16 *new;
	size_t len;

	if (!src)
		return NULL;
	len = (u16_strlen(src) + 1) * sizeof(u16);
	new = malloc(len);
	if (!new)
		return NULL;
	memcpy(new, src, len);

	return new;
}

/* Convert UTF-16 to UTF-8.  */
uint8_t *utf16_to_utf8(uint8_t *dest, const uint16_t *src, size_t size)
{
	uint32_t code_high = 0;

	while (size--) {
		uint32_t code = *src++;

		if (code_high) {
			if (code >= 0xDC00 && code <= 0xDFFF) {
				/* Surrogate pair.  */
				code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;

				*dest++ = (code >> 18) | 0xF0;
				*dest++ = ((code >> 12) & 0x3F) | 0x80;
				*dest++ = ((code >> 6) & 0x3F) | 0x80;
				*dest++ = (code & 0x3F) | 0x80;
			} else {
				/* Error...  */
				*dest++ = '?';
				/* *src may be valid. Don't eat it.  */
				src--;
			}

			code_high = 0;
		} else {
			if (code <= 0x007F) {
				*dest++ = code;
			} else if (code <= 0x07FF) {
				*dest++ = (code >> 6) | 0xC0;
				*dest++ = (code & 0x3F) | 0x80;
			} else if (code >= 0xD800 && code <= 0xDBFF) {
				code_high = code;
				continue;
			} else if (code >= 0xDC00 && code <= 0xDFFF) {
				/* Error... */
				*dest++ = '?';
			} else if (code < 0x10000) {
				*dest++ = (code >> 12) | 0xE0;
				*dest++ = ((code >> 6) & 0x3F) | 0x80;
				*dest++ = (code & 0x3F) | 0x80;
			} else {
				*dest++ = (code >> 18) | 0xF0;
				*dest++ = ((code >> 12) & 0x3F) | 0x80;
				*dest++ = ((code >> 6) & 0x3F) | 0x80;
				*dest++ = (code & 0x3F) | 0x80;
			}
		}
	}

	return dest;
}

int utf_to_cp(s32 *c, const u16 *codepage)
{
	if (*c >= 0x80) {
		int j;

		/* Look up codepage translation */
		for (j = 0; j < 0x80; ++j) {
			if (*c == codepage[j]) {
				*c = j + 0x80;
				return 0;
			}
		}
		*c = '?';
		return -ENOENT;
	}
	return 0;
}

int utf8_to_cp437_stream(u8 c, char *buffer)
{
	char *end;
	const char *pos;
	s32 s;
	int ret;

	for (;;) {
		pos = buffer;
		end = buffer + strlen(buffer);
		*end++ = c;
		*end = 0;
		s = utf8_get(&pos);
		if (s > 0) {
			*buffer = 0;
			ret = utf_to_cp(&s, codepage_437);
			return s;
			}
		if (pos == end)
			return 0;
		*buffer = 0;
	}
}

int utf8_to_utf32_stream(u8 c, char *buffer)
{
	char *end;
	const char *pos;
	s32 s;

	for (;;) {
		pos = buffer;
		end = buffer + strlen(buffer);
		*end++ = c;
		*end = 0;
		s = utf8_get(&pos);
		if (s > 0) {
			*buffer = 0;
			return s;
		}
		if (pos == end)
			return 0;
		*buffer = 0;
	}
}
Commit	Line	Data
f739fcd8	1	// SPDX-License-Identifier: GPL-2.0+
78178bb0 RC	2	/*
	3	* charset conversion utils
	4	*
	5	* Copyright (c) 2017 Rob Clark
78178bb0 RC	6	*/
78178bb0 RC	7
35cbb796	8	#include <common.h>
78178bb0	9	#include <charset.h>
b5130a81	10	#include <capitalization.h>
70616a1e	11	#include <cp437.h>
6974a4a3	12	#include <efi_loader.h>
73bb90ca	13	#include <errno.h>
78178bb0 RC	14	#include <malloc.h>
78178bb0 RC	15
70616a1e HS	16	/**
	17	* codepage_437 - Unicode to codepage 437 translation table
	18	*/
	19	const u16 codepage_437[128] = CP437;
	20
b5130a81 HS	21	static struct capitalization_table capitalization_table[] =
	22	#ifdef CONFIG_EFI_UNICODE_CAPITALIZATION
	23	UNICODE_CAPITALIZATION_TABLE;
	24	#elif CONFIG_FAT_DEFAULT_CODEPAGE == 1250
	25	CP1250_CAPITALIZATION_TABLE;
	26	#else
	27	CP437_CAPITALIZATION_TABLE;
	28	#endif
	29
35cbb796 HS	30	/**
	31	* get_code() - read Unicode code point from UTF-8 stream
	32	*
	33	* @read_u8: - stream reader
	34	* @src: - string buffer passed to stream reader, optional
ddbaff53	35	* Return: - Unicode code point, or -1
35cbb796 HS	36	*/
35cbb796 HS	37	static int get_code(u8 (read_u8)(void data), void *data)
d8c28232	38	{
35cbb796	39	s32 ch = 0;
d8c28232	40
35cbb796 HS	41	ch = read_u8(data);
35cbb796 HS	42	if (!ch)
d8c28232	43	return 0;
35cbb796 HS	44	if (ch >= 0xc2 && ch <= 0xf4) {
	45	int code = 0;
	46
	47	if (ch >= 0xe0) {
	48	if (ch >= 0xf0) {
d8c28232	49	/* 0xf0 - 0xf4 */
35cbb796 HS	50	ch &= 0x07;
	51	code = ch << 18;
	52	ch = read_u8(data);
	53	if (ch < 0x80 \|\| ch > 0xbf)
	54	goto error;
	55	ch &= 0x3f;
d8c28232 HS	56	} else {
d8c28232 HS	57	/* 0xe0 - 0xef */
35cbb796	58	ch &= 0x0f;
d8c28232	59	}
35cbb796	60	code += ch << 12;
d8c28232 HS	61	if ((code >= 0xD800 && code <= 0xDFFF) \|\|
d8c28232 HS	62	code >= 0x110000)
35cbb796 HS	63	goto error;
	64	ch = read_u8(data);
	65	if (ch < 0x80 \|\| ch > 0xbf)
	66	goto error;
d8c28232 HS	67	}
d8c28232 HS	68	/* 0xc0 - 0xdf or continuation byte (0x80 - 0xbf) */
35cbb796 HS	69	ch &= 0x3f;
	70	code += ch << 6;
	71	ch = read_u8(data);
	72	if (ch < 0x80 \|\| ch > 0xbf)
	73	goto error;
	74	ch &= 0x3f;
	75	ch += code;
	76	} else if (ch >= 0x80) {
	77	goto error;
d8c28232	78	}
35cbb796 HS	79	return ch;
35cbb796 HS	80	error:
ddbaff53	81	return -1;
35cbb796 HS	82	}
	83
	84	/**
	85	* read_string() - read byte from character string
	86	*
	87	* @data: - pointer to string
	88	* Return: - byte read
	89	*
	90	* The string pointer is incremented if it does not point to '\0'.
	91	*/
	92	static u8 read_string(void *data)
	93
	94	{
	95	const char src = (const char )data;
	96	u8 c;
	97
	98	if (!src \|\| !src \|\| !*src)
	99	return 0;
	100	c = **src;
d8c28232	101	++*src;
35cbb796 HS	102	return c;
	103	}
	104
	105	/**
	106	* read_console() - read byte from console
	107	*
60d79876 HS	108	* @data - not used, needed to match interface
60d79876 HS	109	* Return: - byte read or 0 on error
35cbb796 HS	110	*/
	111	static u8 read_console(void *data)
	112	{
60d79876 HS	113	int ch;
60d79876 HS	114
c670aeee	115	ch = getchar();
60d79876 HS	116	if (ch < 0)
	117	ch = 0;
	118	return ch;
35cbb796 HS	119	}
	120
	121	int console_read_unicode(s32 *code)
	122	{
ddbaff53 HS	123	for (;;) {
	124	s32 c;
	125
	126	if (!tstc()) {
	127	/* No input available */
	128	return 1;
	129	}
35cbb796	130
ddbaff53 HS	131	/* Read Unicode code */
	132	c = get_code(read_console, NULL);
	133	if (c > 0) {
	134	*code = c;
	135	return 0;
	136	}
	137	}
35cbb796 HS	138	}
	139
	140	s32 utf8_get(const char **src)
	141	{
	142	return get_code(read_string, src);
d8c28232 HS	143	}
	144
	145	int utf8_put(s32 code, char **dst)
	146	{
	147	if (!dst \|\| !*dst)
	148	return -1;
	149	if ((code >= 0xD800 && code <= 0xDFFF) \|\| code >= 0x110000)
	150	return -1;
	151	if (code <= 0x007F) {
	152	**dst = code;
	153	} else {
	154	if (code <= 0x07FF) {
	155	**dst = code >> 6 \| 0xC0;
	156	} else {
	157	if (code < 0x10000) {
	158	**dst = code >> 12 \| 0xE0;
	159	} else {
	160	**dst = code >> 18 \| 0xF0;
	161	++*dst;
	162	**dst = (code >> 12 & 0x3F) \| 0x80;
	163	}
	164	++*dst;
	165	**dst = (code >> 6 & 0x3F) \| 0x80;
	166	}
	167	++*dst;
	168	**dst = (code & 0x3F) \| 0x80;
	169	}
	170	++*dst;
	171	return 0;
	172	}
	173
	174	size_t utf8_utf16_strnlen(const char *src, size_t count)
	175	{
	176	size_t len = 0;
	177
	178	for (; *src && count; --count) {
	179	s32 code = utf8_get(&src);
	180
	181	if (!code)
	182	break;
	183	if (code < 0) {
	184	/* Reserve space for a replacement character */
	185	len += 1;
	186	} else if (code < 0x10000) {
	187	len += 1;
	188	} else {
	189	len += 2;
	190	}
	191	}
	192	return len;
	193	}
	194
	195	int utf8_utf16_strncpy(u16 *dst, const char src, size_t count)
	196	{
	197	if (!src \|\| !dst \|\| !*dst)
	198	return -1;
	199
	200	for (; count && *src; --count) {
	201	s32 code = utf8_get(&src);
	202
	203	if (code < 0)
	204	code = '?';
	205	utf16_put(code, dst);
	206	}
207	**dst = 0;
208	return 0;
209	}
210
211	s32 utf16_get(const u16 **src)
212	{
213	s32 code, code2;
214
215	if (!src \|\| !*src)
216	return -1;
217	if (!**src)
218	return 0;
219	code = **src;
220	++*src;
221	if (code >= 0xDC00 && code <= 0xDFFF)
222	return -1;
223	if (code >= 0xD800 && code <= 0xDBFF) {
224	if (!**src)
225	return -1;
226	code &= 0x3ff;
227	code <<= 10;
228	code += 0x10000;
229	code2 = **src;
230	++*src;
231	if (code2 <= 0xDC00 \|\| code2 >= 0xDFFF)
232	return -1;
233	code2 &= 0x3ff;
234	code += code2;
235	}
236	return code;
237	}
238
239	int utf16_put(s32 code, u16 **dst)
240	{
241	if (!dst \|\| !*dst)
242	return -1;
243	if ((code >= 0xD800 && code <= 0xDFFF) \|\| code >= 0x110000)
244	return -1;
245	if (code < 0x10000) {
246	**dst = code;
247	} else {
248	code -= 0x10000;
249	**dst = code >> 10 \| 0xD800;
250	++*dst;
251	**dst = (code & 0x3ff) \| 0xDC00;
252	}
253	++*dst;
254	return 0;
255	}
256
257	size_t utf16_strnlen(const u16 *src, size_t count)
258	{
259	size_t len = 0;
260
261	for (; *src && count; --count) {
262	s32 code = utf16_get(&src);
263
264	if (!code)
265	break;
266	/*
267	* In case of an illegal sequence still reserve space for a
268	* replacement character.
269	*/
270	++len;
271	}
272	return len;
273	}
274
275	size_t utf16_utf8_strnlen(const u16 *src, size_t count)
276	{
277	size_t len = 0;
278
279	for (; *src && count; --count) {
280	s32 code = utf16_get(&src);
281
282	if (!code)
283	break;
284	if (code < 0)
285	/* Reserve space for a replacement character */
286	len += 1;
287	else if (code < 0x80)
288	len += 1;
289	else if (code < 0x800)
290	len += 2;
291	else if (code < 0x10000)
292	len += 3;
293	else
294	len += 4;
295	}
296	return len;
297	}
298
299	int utf16_utf8_strncpy(char *dst, const u16 src, size_t count)
300	{
301	if (!src \|\| !dst \|\| !*dst)
302	return -1;
303
304	for (; count && *src; --count) {
305	s32 code = utf16_get(&src);
306
307	if (code < 0)
308	code = '?';
309	utf8_put(code, dst);
310	}
311	**dst = 0;
312	return 0;
313	}
314
b5130a81 HS	315	s32 utf_to_lower(const s32 code)
	316	{
	317	struct capitalization_table *pos = capitalization_table;
	318	s32 ret = code;
	319
	320	if (code <= 0x7f) {
	321	if (code >= 'A' && code <= 'Z')
	322	ret += 0x20;
	323	return ret;
	324	}
	325	for (; pos->upper; ++pos) {
	326	if (pos->upper == code) {
	327	ret = pos->lower;
	328	break;
	329	}
	330	}
	331	return ret;
	332	}
	333
	334	s32 utf_to_upper(const s32 code)
	335	{
	336	struct capitalization_table *pos = capitalization_table;
	337	s32 ret = code;
	338
	339	if (code <= 0x7f) {
	340	if (code >= 'a' && code <= 'z')
	341	ret -= 0x20;
	342	return ret;
	343	}
	344	for (; pos->lower; ++pos) {
	345	if (pos->lower == code) {
	346	ret = pos->upper;
	347	break;
	348	}
	349	}
	350	return ret;
	351	}
78178bb0	352
f8062c96 AT	353	/*
	354	* u16_strncmp() - compare two u16 string
	355	*
	356	* @s1: first string to compare
	357	* @s2: second string to compare
	358	* @n: maximum number of u16 to compare
	359	* Return: 0 if the first n u16 are the same in s1 and s2
	360	* < 0 if the first different u16 in s1 is less than the
	361	* corresponding u16 in s2
	362	* > 0 if the first different u16 in s1 is greater than the
	363	* corresponding u16 in s2
	364	*/
	365	int u16_strncmp(const u16 s1, const u16 s2, size_t n)
	366	{
	367	int ret = 0;
	368
	369	for (; n; --n, ++s1, ++s2) {
	370	ret = s1 - s2;
	371	if (ret \|\| !*s1)
	372	break;
	373	}
	374
	375	return ret;
	376	}
	377
317068b8	378	size_t u16_strlen(const void *in)
78178bb0	379	{
317068b8 HS	380	const char *pos = in;
	381	size_t ret;
	382
	383	for (; pos[0] \|\| pos[1]; pos += 2)
	384	;
	385	ret = pos - (char *)in;
	386	ret >>= 1;
	387	return ret;
78178bb0 RC	388	}
78178bb0 RC	389
6974a4a3	390	size_t __efi_runtime u16_strnlen(const u16 *in, size_t count)
78178bb0 RC	391	{
	392	size_t i;
	393	for (i = 0; count-- && in[i]; i++);
	394	return i;
	395	}
	396
4835d35a SG	397	size_t u16_strsize(const void *in)
	398	{
	399	return (u16_strlen(in) + 1) * sizeof(u16);
	400	}
	401
2a3537ae AT	402	u16 u16_strcpy(u16 dest, const u16 *src)
	403	{
	404	u16 *tmp = dest;
	405
	406	for (;; dest++, src++) {
	407	dest = src;
	408	if (!*src)
	409	break;
	410	}
	411
	412	return tmp;
	413	}
	414
317068b8	415	u16 u16_strdup(const void src)
2a3537ae AT	416	{
2a3537ae AT	417	u16 *new;
317068b8	418	size_t len;
2a3537ae AT	419
	420	if (!src)
	421	return NULL;
317068b8 HS	422	len = (u16_strlen(src) + 1) * sizeof(u16);
317068b8 HS	423	new = malloc(len);
2a3537ae AT	424	if (!new)
2a3537ae AT	425	return NULL;
317068b8	426	memcpy(new, src, len);
2a3537ae AT	427
	428	return new;
	429	}
	430
78178bb0 RC	431	/* Convert UTF-16 to UTF-8. */
	432	uint8_t utf16_to_utf8(uint8_t dest, const uint16_t *src, size_t size)
	433	{
	434	uint32_t code_high = 0;
	435
	436	while (size--) {
	437	uint32_t code = *src++;
	438
	439	if (code_high) {
	440	if (code >= 0xDC00 && code <= 0xDFFF) {
	441	/* Surrogate pair. */
	442	code = ((code_high - 0xD800) << 10) + (code - 0xDC00) + 0x10000;
	443
	444	*dest++ = (code >> 18) \| 0xF0;
	445	*dest++ = ((code >> 12) & 0x3F) \| 0x80;
	446	*dest++ = ((code >> 6) & 0x3F) \| 0x80;
	447	*dest++ = (code & 0x3F) \| 0x80;
	448	} else {
	449	/* Error... */
	450	*dest++ = '?';
	451	/* src may be valid. Don't eat it. /
	452	src--;
	453	}
	454
	455	code_high = 0;
	456	} else {
	457	if (code <= 0x007F) {
	458	*dest++ = code;
	459	} else if (code <= 0x07FF) {
	460	*dest++ = (code >> 6) \| 0xC0;
	461	*dest++ = (code & 0x3F) \| 0x80;
	462	} else if (code >= 0xD800 && code <= 0xDBFF) {
	463	code_high = code;
	464	continue;
	465	} else if (code >= 0xDC00 && code <= 0xDFFF) {
	466	/* Error... */
	467	*dest++ = '?';
	468	} else if (code < 0x10000) {
	469	*dest++ = (code >> 12) \| 0xE0;
	470	*dest++ = ((code >> 6) & 0x3F) \| 0x80;
	471	*dest++ = (code & 0x3F) \| 0x80;
	472	} else {
	473	*dest++ = (code >> 18) \| 0xF0;
	474	*dest++ = ((code >> 12) & 0x3F) \| 0x80;
	475	*dest++ = ((code >> 6) & 0x3F) \| 0x80;
	476	*dest++ = (code & 0x3F) \| 0x80;
	477	}
	478	}
	479	}
	480
	481	return dest;
	482	}
73bb90ca	483
73bb90ca HS	484	int utf_to_cp(s32 c, const u16 codepage)
	485	{
	486	if (*c >= 0x80) {
	487	int j;
	488
	489	/* Look up codepage translation */
	490	for (j = 0; j < 0x80; ++j) {
	491	if (*c == codepage[j]) {
	492	*c = j + 0x80;
	493	return 0;
	494	}
	495	}
	496	*c = '?';
	497	return -ENOENT;
	498	}
	499	return 0;
	500	}
e91789e2 HS	501
	502	int utf8_to_cp437_stream(u8 c, char *buffer)
	503	{
	504	char *end;
	505	const char *pos;
	506	s32 s;
	507	int ret;
	508
	509	for (;;) {
	510	pos = buffer;
	511	end = buffer + strlen(buffer);
	512	*end++ = c;
	513	*end = 0;
	514	s = utf8_get(&pos);
	515	if (s > 0) {
	516	*buffer = 0;
	517	ret = utf_to_cp(&s, codepage_437);
	518	return s;
	519	}
	520	if (pos == end)
	521	return 0;
	522	*buffer = 0;
	523	}
	524	}
	525
	526	int utf8_to_utf32_stream(u8 c, char *buffer)
	527	{
	528	char *end;
	529	const char *pos;
	530	s32 s;
	531
	532	for (;;) {
	533	pos = buffer;
	534	end = buffer + strlen(buffer);
	535	*end++ = c;
	536	*end = 0;
	537	s = utf8_get(&pos);
	538	if (s > 0) {
	539	*buffer = 0;
	540	return s;
	541	}
	542	if (pos == end)
	543	return 0;
	544	*buffer = 0;
	545	}
	546	}