[binutils.git] / gdb / charset.h

/* Character set conversion support for GDB.
   Copyright (C) 2001, 2007, 2008, 2009, 2010, 2011
   Free Software Foundation, Inc.

   This file is part of GDB.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#ifndef CHARSET_H
#define CHARSET_H

/* If the target program uses a different character set than the host,
   GDB has some support for translating between the two; GDB converts
   characters and strings to the host character set before displaying
   them, and converts characters and strings appearing in expressions
   entered by the user to the target character set.

   GDB's code pretty much assumes that the host character set is some
   superset of ASCII; there are plenty if ('0' + n) expressions and
   the like.  */

/* Return the name of the current host/target character set.  The
   result is owned by the charset module; the caller should not free
   it.  */
const char *host_charset (void);
const char *target_charset (struct gdbarch *gdbarch);
const char *target_wide_charset (struct gdbarch *gdbarch);

/* These values are used to specify the type of transliteration done
   by convert_between_encodings.  */
enum transliterations
  {
    /* Error on failure to convert.  */
    translit_none,
    /* Transliterate to host char.  */
    translit_char
  };

/* Convert between two encodings.

   FROM is the name of the source encoding.
   TO is the name of the target encoding.
   BYTES holds the bytes to convert; this is assumed to be characters
   in the target encoding.
   NUM_BYTES is the number of bytes.
   WIDTH is the width of a character from the FROM charset, in bytes.
   For a variable width encoding, WIDTH should be the size of a "base
   character".
   OUTPUT is an obstack where the converted data is written.  The
   caller is responsible for initializing the obstack, and for
   destroying the obstack should an error occur.
   TRANSLIT specifies how invalid conversions should be handled.  */

void convert_between_encodings (const char *from, const char *to,
				const gdb_byte *bytes,
				unsigned int num_bytes,
				int width, struct obstack *output,
				enum transliterations translit);


/* These values are used by wchar_iterate to report errors.  */
enum wchar_iterate_result
  {
    /* Ordinary return.  */
    wchar_iterate_ok,
    /* Invalid input sequence.  */
    wchar_iterate_invalid,
    /* Incomplete input sequence at the end of the input.  */
    wchar_iterate_incomplete,
    /* EOF.  */
    wchar_iterate_eof
  };

/* Declaration of the opaque wchar iterator type.  */
struct wchar_iterator;

/* Create a new character iterator which returns wchar_t's.  INPUT is
   the input buffer.  BYTES is the number of bytes in the input
   buffer.  CHARSET is the name of the character set in which INPUT is
   encoded.  WIDTH is the number of bytes in a base character of
   CHARSET.
   
   This function either returns a new character set iterator, or calls
   error.  The result can be freed using a cleanup; see
   make_cleanup_wchar_iterator.  */
struct wchar_iterator *make_wchar_iterator (const gdb_byte *input,
					    size_t bytes,
					    const char *charset,
					    size_t width);

/* Return a new cleanup suitable for destroying the wchar iterator
   ITER.  */
struct cleanup *make_cleanup_wchar_iterator (struct wchar_iterator *iter);

/* Perform a single iteration of a wchar_t iterator.
   
   Returns the number of characters converted.  A negative result
   means that EOF has been reached.  A positive result indicates the
   number of valid wchar_ts in the result; *OUT_CHARS is updated to
   point to the first valid character.

   In all cases aside from EOF, *PTR is set to point to the first
   converted target byte.  *LEN is set to the number of bytes
   converted.

   A zero result means one of several unusual results.  *OUT_RESULT is
   set to indicate the type of un-ordinary return.

   wchar_iterate_invalid means that an invalid input character was
   seen.  The iterator is advanced by WIDTH (the argument to
   make_wchar_iterator) bytes.

   wchar_iterate_incomplete means that an incomplete character was
   seen at the end of the input sequence.
   
   wchar_iterate_eof means that all bytes were successfully
   converted.  The other output arguments are not set.  */
int wchar_iterate (struct wchar_iterator *iter,
		   enum wchar_iterate_result *out_result,
		   gdb_wchar_t **out_chars,
		   const gdb_byte **ptr, size_t *len);

\f

/* GDB needs to know a few details of its execution character set.
   This knowledge is isolated here and in charset.c.  */

/* The escape character.  */
#define HOST_ESCAPE_CHAR 27

/* Convert a letter, like 'c', to its corresponding control
   character.  */
char host_letter_to_control_character (char c);

/* Convert a hex digit character to its numeric value.  E.g., 'f' is
   converted to 15.  This function assumes that C is a valid hex
   digit.  Both upper- and lower-case letters are recognized.  */
int host_hex_value (char c);

#endif /* CHARSET_H */
Commit	Line	Data
234b45d4	1	/* Character set conversion support for GDB.
7b6bb8da JB	2	Copyright (C) 2001, 2007, 2008, 2009, 2010, 2011
7b6bb8da JB	3	Free Software Foundation, Inc.
234b45d4 KB	4
	5	This file is part of GDB.
	6
	7	This program is free software; you can redistribute it and/or modify
	8	it under the terms of the GNU General Public License as published by
a9762ec7	9	the Free Software Foundation; either version 3 of the License, or
234b45d4 KB	10	(at your option) any later version.
	11
	12	This program is distributed in the hope that it will be useful,
	13	but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	15	GNU General Public License for more details.
	16
	17	You should have received a copy of the GNU General Public License
a9762ec7	18	along with this program. If not, see <http://www.gnu.org/licenses/>. */
234b45d4 KB	19
	20	#ifndef CHARSET_H
	21	#define CHARSET_H
	22
234b45d4 KB	23	/* If the target program uses a different character set than the host,
	24	GDB has some support for translating between the two; GDB converts
	25	characters and strings to the host character set before displaying
	26	them, and converts characters and strings appearing in expressions
	27	entered by the user to the target character set.
	28
6c7a06a3 TT	29	GDB's code pretty much assumes that the host character set is some
	30	superset of ASCII; there are plenty if ('0' + n) expressions and
	31	the like. */
234b45d4	32
234b45d4 KB	33	/* Return the name of the current host/target character set. The
	34	result is owned by the charset module; the caller should not free
	35	it. */
	36	const char *host_charset (void);
f870a310 TT	37	const char target_charset (struct gdbarch gdbarch);
f870a310 TT	38	const char target_wide_charset (struct gdbarch gdbarch);
6c7a06a3 TT	39
	40	/* These values are used to specify the type of transliteration done
	41	by convert_between_encodings. */
	42	enum transliterations
	43	{
	44	/* Error on failure to convert. */
	45	translit_none,
	46	/* Transliterate to host char. */
	47	translit_char
	48	};
	49
	50	/* Convert between two encodings.
	51
	52	FROM is the name of the source encoding.
	53	TO is the name of the target encoding.
	54	BYTES holds the bytes to convert; this is assumed to be characters
	55	in the target encoding.
	56	NUM_BYTES is the number of bytes.
	57	WIDTH is the width of a character from the FROM charset, in bytes.
	58	For a variable width encoding, WIDTH should be the size of a "base
	59	character".
	60	OUTPUT is an obstack where the converted data is written. The
	61	caller is responsible for initializing the obstack, and for
	62	destroying the obstack should an error occur.
	63	TRANSLIT specifies how invalid conversions should be handled. */
aff410f1	64
6c7a06a3	65	void convert_between_encodings (const char from, const char to,
aff410f1 MS	66	const gdb_byte *bytes,
aff410f1 MS	67	unsigned int num_bytes,
6c7a06a3 TT	68	int width, struct obstack *output,
	69	enum transliterations translit);
	70
	71
	72	/* These values are used by wchar_iterate to report errors. */
	73	enum wchar_iterate_result
	74	{
	75	/* Ordinary return. */
	76	wchar_iterate_ok,
	77	/* Invalid input sequence. */
	78	wchar_iterate_invalid,
	79	/* Incomplete input sequence at the end of the input. */
	80	wchar_iterate_incomplete,
	81	/* EOF. */
	82	wchar_iterate_eof
	83	};
	84
	85	/* Declaration of the opaque wchar iterator type. */
	86	struct wchar_iterator;
	87
	88	/* Create a new character iterator which returns wchar_t's. INPUT is
	89	the input buffer. BYTES is the number of bytes in the input
	90	buffer. CHARSET is the name of the character set in which INPUT is
	91	encoded. WIDTH is the number of bytes in a base character of
	92	CHARSET.
	93
	94	This function either returns a new character set iterator, or calls
	95	error. The result can be freed using a cleanup; see
	96	make_cleanup_wchar_iterator. */
aff410f1 MS	97	struct wchar_iterator make_wchar_iterator (const gdb_byte input,
aff410f1 MS	98	size_t bytes,
6c7a06a3 TT	99	const char *charset,
	100	size_t width);
	101
	102	/* Return a new cleanup suitable for destroying the wchar iterator
	103	ITER. */
	104	struct cleanup make_cleanup_wchar_iterator (struct wchar_iterator iter);
	105
	106	/* Perform a single iteration of a wchar_t iterator.
	107
	108	Returns the number of characters converted. A negative result
	109	means that EOF has been reached. A positive result indicates the
	110	number of valid wchar_ts in the result; *OUT_CHARS is updated to
	111	point to the first valid character.
	112
	113	In all cases aside from EOF, *PTR is set to point to the first
	114	converted target byte. *LEN is set to the number of bytes
	115	converted.
	116
	117	A zero result means one of several unusual results. *OUT_RESULT is
	118	set to indicate the type of un-ordinary return.
	119
	120	wchar_iterate_invalid means that an invalid input character was
	121	seen. The iterator is advanced by WIDTH (the argument to
	122	make_wchar_iterator) bytes.
	123
	124	wchar_iterate_incomplete means that an incomplete character was
	125	seen at the end of the input sequence.
	126
	127	wchar_iterate_eof means that all bytes were successfully
	128	converted. The other output arguments are not set. */
	129	int wchar_iterate (struct wchar_iterator *iter,
	130	enum wchar_iterate_result *out_result,
	131	gdb_wchar_t **out_chars,
	132	const gdb_byte *ptr, size_t len);
	133
	134	\f
	135
	136	/* GDB needs to know a few details of its execution character set.
	137	This knowledge is isolated here and in charset.c. */
	138
	139	/* The escape character. */
	140	#define HOST_ESCAPE_CHAR 27
	141
	142	/* Convert a letter, like 'c', to its corresponding control
	143	character. */
	144	char host_letter_to_control_character (char c);
	145
	146	/* Convert a hex digit character to its numeric value. E.g., 'f' is
	147	converted to 15. This function assumes that C is a valid hex
	148	digit. Both upper- and lower-case letters are recognized. */
	149	int host_hex_value (char c);
234b45d4 KB	150
234b45d4 KB	151	#endif /* CHARSET_H */