]>
Commit | Line | Data |
---|---|---|
234b45d4 KB |
1 | /* Character set conversion support for GDB. |
2 | Copyright 2001 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of GDB. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with this program; if not, write to the Free Software | |
18 | Foundation, Inc., 59 Temple Place - Suite 330, | |
19 | Boston, MA 02111-1307, USA. */ | |
20 | ||
21 | #ifndef CHARSET_H | |
22 | #define CHARSET_H | |
23 | ||
24 | ||
25 | /* If the target program uses a different character set than the host, | |
26 | GDB has some support for translating between the two; GDB converts | |
27 | characters and strings to the host character set before displaying | |
28 | them, and converts characters and strings appearing in expressions | |
29 | entered by the user to the target character set. | |
30 | ||
31 | At the moment, GDB only supports single-byte, stateless character | |
32 | sets. This includes the ISO-8859 family (ASCII extended with | |
33 | accented characters, and (I think) Cyrillic, for European | |
34 | languages), and the EBCDIC family (used on IBM's mainframes). | |
35 | Unfortunately, it excludes many Asian scripts, the fixed- and | |
36 | variable-width Unicode encodings, and other desireable things. | |
37 | Patches are welcome! (For example, it would be nice if the Java | |
38 | string support could simply get absorbed into some more general | |
39 | multi-byte encoding support.) | |
40 | ||
41 | Furthermore, GDB's code pretty much assumes that the host character | |
42 | set is some superset of ASCII; there are plenty if ('0' + n) | |
43 | expressions and the like. | |
44 | ||
45 | When the `iconv' library routine supports a character set meeting | |
46 | the requirements above, it's easy to plug an entry into GDB's table | |
47 | that uses iconv to handle the details. */ | |
48 | ||
49 | ||
50 | /* Set the host character set to CHARSET. CHARSET must be a superset | |
51 | of ASCII, since GDB's code assumes this. */ | |
52 | void set_host_charset (const char *charset); | |
53 | ||
54 | ||
55 | /* Set the target character set to CHARSET. */ | |
56 | void set_target_charset (const char *charset); | |
57 | ||
58 | ||
59 | /* Return the name of the current host/target character set. The | |
60 | result is owned by the charset module; the caller should not free | |
61 | it. */ | |
62 | const char *host_charset (void); | |
63 | const char *target_charset (void); | |
64 | ||
65 | ||
66 | /* In general, the set of C backslash escapes (\n, \f) is specific to | |
67 | the character set. Not all character sets will have form feed | |
68 | characters, for example. | |
69 | ||
70 | The following functions allow GDB to parse and print control | |
71 | characters in a character-set-independent way. They are both | |
72 | language-specific (to C and C++) and character-set-specific. | |
73 | Putting them here is a compromise. */ | |
74 | ||
75 | ||
76 | /* If the target character TARGET_CHAR have a backslash escape in the | |
77 | C language (i.e., a character like 'n' or 't'), return the host | |
78 | character string that should follow the backslash. Otherwise, | |
79 | return zero. | |
80 | ||
81 | When this function returns non-zero, the string it returns is | |
82 | statically allocated; the caller is not responsible for freeing it. */ | |
83 | const char *c_target_char_has_backslash_escape (int target_char); | |
84 | ||
85 | ||
86 | /* If the host character HOST_CHAR is a valid backslash escape in the | |
87 | C language for the target character set, return non-zero, and set | |
88 | *TARGET_CHAR to the target character the backslash escape represents. | |
89 | Otherwise, return zero. */ | |
90 | int c_parse_backslash (int host_char, int *target_char); | |
91 | ||
92 | ||
93 | /* Return non-zero if the host character HOST_CHAR can be printed | |
94 | literally --- that is, if it can be readably printed as itself in a | |
95 | character or string constant. Return zero if it should be printed | |
96 | using some kind of numeric escape, like '\031' in C, '^(25)' in | |
97 | Chill, or #25 in Pascal. */ | |
98 | int host_char_print_literally (int host_char); | |
99 | ||
100 | ||
101 | /* If the host character HOST_CHAR has an equivalent in the target | |
102 | character set, set *TARGET_CHAR to that equivalent, and return | |
103 | non-zero. Otherwise, return zero. */ | |
104 | int host_char_to_target (int host_char, int *target_char); | |
105 | ||
106 | ||
107 | /* If the target character TARGET_CHAR has an equivalent in the host | |
108 | character set, set *HOST_CHAR to that equivalent, and return | |
109 | non-zero. Otherwise, return zero. */ | |
110 | int target_char_to_host (int target_char, int *host_char); | |
111 | ||
112 | ||
113 | /* If the target character TARGET_CHAR has a corresponding control | |
114 | character (also in the target character set), set *TARGET_CTRL_CHAR | |
115 | to the control character, and return non-zero. Otherwise, return | |
116 | zero. */ | |
117 | int target_char_to_control_char (int target_char, int *target_ctrl_char); | |
118 | ||
119 | ||
120 | #endif /* CHARSET_H */ |