]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * unicode.c | |
3 | * | |
4 | * PURPOSE | |
5 | * Routines for converting between UTF-8 and OSTA Compressed Unicode. | |
6 | * Also handles filename mangling | |
7 | * | |
8 | * DESCRIPTION | |
9 | * OSTA Compressed Unicode is explained in the OSTA UDF specification. | |
10 | * http://www.osta.org/ | |
11 | * UTF-8 is explained in the IETF RFC XXXX. | |
12 | * ftp://ftp.internic.net/rfc/rfcxxxx.txt | |
13 | * | |
1da177e4 LT |
14 | * COPYRIGHT |
15 | * This file is distributed under the terms of the GNU General Public | |
16 | * License (GPL). Copies of the GPL can be obtained from: | |
17 | * ftp://prep.ai.mit.edu/pub/gnu/GPL | |
18 | * Each contributing author retains all rights to their own work. | |
19 | */ | |
20 | ||
21 | #include "udfdecl.h" | |
22 | ||
23 | #include <linux/kernel.h> | |
24 | #include <linux/string.h> /* for memset */ | |
25 | #include <linux/nls.h> | |
f845fced | 26 | #include <linux/crc-itu-t.h> |
5a0e3ad6 | 27 | #include <linux/slab.h> |
1da177e4 LT |
28 | |
29 | #include "udf_sb.h" | |
30 | ||
3e7fc205 AG |
31 | static int udf_uni2char_utf8(wchar_t uni, |
32 | unsigned char *out, | |
33 | int boundlen) | |
1da177e4 | 34 | { |
3e7fc205 AG |
35 | int u_len = 0; |
36 | ||
37 | if (boundlen <= 0) | |
38 | return -ENAMETOOLONG; | |
39 | ||
40 | if (uni < 0x80) { | |
41 | out[u_len++] = (unsigned char)uni; | |
42 | } else if (uni < 0x800) { | |
43 | if (boundlen < 2) | |
44 | return -ENAMETOOLONG; | |
45 | out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); | |
46 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | |
47 | } else { | |
48 | if (boundlen < 3) | |
49 | return -ENAMETOOLONG; | |
50 | out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); | |
51 | out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); | |
52 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | |
1da177e4 | 53 | } |
3e7fc205 | 54 | return u_len; |
1da177e4 LT |
55 | } |
56 | ||
3e7fc205 AG |
57 | static int udf_char2uni_utf8(const unsigned char *in, |
58 | int boundlen, | |
59 | wchar_t *uni) | |
1da177e4 | 60 | { |
3e7fc205 AG |
61 | unsigned int utf_char; |
62 | unsigned char c; | |
63 | int utf_cnt, u_len; | |
bb00c898 | 64 | |
3e7fc205 AG |
65 | utf_char = 0; |
66 | utf_cnt = 0; | |
67 | for (u_len = 0; u_len < boundlen;) { | |
68 | c = in[u_len++]; | |
1da177e4 LT |
69 | |
70 | /* Complete a multi-byte UTF-8 character */ | |
cb00ea35 | 71 | if (utf_cnt) { |
3e7fc205 | 72 | utf_char = (utf_char << 6) | (c & 0x3f); |
1da177e4 LT |
73 | if (--utf_cnt) |
74 | continue; | |
cb00ea35 | 75 | } else { |
1da177e4 | 76 | /* Check for a multi-byte UTF-8 character */ |
3e7fc205 | 77 | if (c & 0x80) { |
1da177e4 | 78 | /* Start a multi-byte UTF-8 character */ |
3e7fc205 AG |
79 | if ((c & 0xe0) == 0xc0) { |
80 | utf_char = c & 0x1f; | |
1da177e4 | 81 | utf_cnt = 1; |
3e7fc205 AG |
82 | } else if ((c & 0xf0) == 0xe0) { |
83 | utf_char = c & 0x0f; | |
1da177e4 | 84 | utf_cnt = 2; |
3e7fc205 AG |
85 | } else if ((c & 0xf8) == 0xf0) { |
86 | utf_char = c & 0x07; | |
1da177e4 | 87 | utf_cnt = 3; |
3e7fc205 AG |
88 | } else if ((c & 0xfc) == 0xf8) { |
89 | utf_char = c & 0x03; | |
1da177e4 | 90 | utf_cnt = 4; |
3e7fc205 AG |
91 | } else if ((c & 0xfe) == 0xfc) { |
92 | utf_char = c & 0x01; | |
1da177e4 | 93 | utf_cnt = 5; |
28de7948 | 94 | } else { |
3e7fc205 AG |
95 | utf_cnt = -1; |
96 | break; | |
28de7948 | 97 | } |
1da177e4 | 98 | continue; |
28de7948 | 99 | } else { |
1da177e4 LT |
100 | /* Single byte UTF-8 character (most common) */ |
101 | utf_char = c; | |
28de7948 | 102 | } |
1da177e4 | 103 | } |
3e7fc205 AG |
104 | *uni = utf_char; |
105 | break; | |
1da177e4 | 106 | } |
cb00ea35 | 107 | if (utf_cnt) { |
3e7fc205 AG |
108 | *uni = '?'; |
109 | return -EINVAL; | |
1da177e4 | 110 | } |
3e7fc205 | 111 | return u_len; |
1da177e4 LT |
112 | } |
113 | ||
484a10f4 AG |
114 | #define ILLEGAL_CHAR_MARK '_' |
115 | #define EXT_MARK '.' | |
116 | #define CRC_MARK '#' | |
117 | #define EXT_SIZE 5 | |
118 | /* Number of chars we need to store generated CRC to make filename unique */ | |
119 | #define CRC_LEN 5 | |
120 | ||
121 | static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | |
122 | int *str_o_idx, | |
123 | const uint8_t *str_i, int str_i_max_len, | |
124 | int *str_i_idx, | |
125 | int u_ch, int *needsCRC, | |
126 | int (*conv_f)(wchar_t, unsigned char *, int), | |
127 | int translate) | |
128 | { | |
129 | uint32_t c; | |
130 | int illChar = 0; | |
131 | int len, gotch = 0; | |
132 | ||
133 | for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { | |
134 | if (*str_o_idx >= str_o_max_len) { | |
135 | *needsCRC = 1; | |
136 | return gotch; | |
137 | } | |
138 | ||
139 | /* Expand OSTA compressed Unicode to Unicode */ | |
140 | c = str_i[*str_i_idx]; | |
141 | if (u_ch > 1) | |
142 | c = (c << 8) | str_i[*str_i_idx + 1]; | |
143 | ||
144 | if (translate && (c == '/' || c == 0)) | |
145 | illChar = 1; | |
146 | else if (illChar) | |
147 | break; | |
148 | else | |
149 | gotch = 1; | |
150 | } | |
151 | if (illChar) { | |
152 | *needsCRC = 1; | |
153 | c = ILLEGAL_CHAR_MARK; | |
154 | gotch = 1; | |
155 | } | |
156 | if (gotch) { | |
157 | len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); | |
158 | /* Valid character? */ | |
159 | if (len >= 0) | |
160 | *str_o_idx += len; | |
161 | else if (len == -ENAMETOOLONG) { | |
162 | *needsCRC = 1; | |
163 | gotch = 0; | |
164 | } else { | |
165 | str_o[(*str_o_idx)++] = '?'; | |
166 | *needsCRC = 1; | |
167 | } | |
168 | } | |
169 | return gotch; | |
170 | } | |
171 | ||
9293fcfb AG |
172 | static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, |
173 | const uint8_t *ocu, int ocu_len, | |
484a10f4 AG |
174 | int (*conv_f)(wchar_t, unsigned char *, int), |
175 | int translate) | |
1da177e4 | 176 | { |
484a10f4 | 177 | uint32_t c; |
9293fcfb | 178 | uint8_t cmp_id; |
484a10f4 AG |
179 | int idx, len; |
180 | int u_ch; | |
181 | int needsCRC = 0; | |
182 | int ext_i_len, ext_max_len; | |
183 | int str_o_len = 0; /* Length of resulting output */ | |
184 | int ext_o_len = 0; /* Extension output length */ | |
185 | int ext_crc_len = 0; /* Extension output length if used with CRC */ | |
186 | int i_ext = -1; /* Extension position in input buffer */ | |
187 | int o_crc = 0; /* Rightmost possible output pos for CRC+ext */ | |
188 | unsigned short valueCRC; | |
189 | uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; | |
190 | uint8_t crc[CRC_LEN]; | |
1da177e4 | 191 | |
9293fcfb AG |
192 | if (str_max_len <= 0) |
193 | return 0; | |
1da177e4 | 194 | |
cb00ea35 | 195 | if (ocu_len == 0) { |
9293fcfb | 196 | memset(str_o, 0, str_max_len); |
1da177e4 LT |
197 | return 0; |
198 | } | |
199 | ||
9293fcfb | 200 | cmp_id = ocu[0]; |
34f953dd | 201 | if (cmp_id != 8 && cmp_id != 16) { |
9293fcfb | 202 | memset(str_o, 0, str_max_len); |
484a10f4 | 203 | pr_err("unknown compression code (%d)\n", cmp_id); |
78fc2e69 | 204 | return -EINVAL; |
1da177e4 | 205 | } |
484a10f4 | 206 | u_ch = cmp_id >> 3; |
1da177e4 | 207 | |
484a10f4 AG |
208 | ocu++; |
209 | ocu_len--; | |
1da177e4 | 210 | |
484a10f4 AG |
211 | if (ocu_len % u_ch) { |
212 | pr_err("incorrect filename length (%d)\n", ocu_len + 1); | |
213 | return -EINVAL; | |
214 | } | |
215 | ||
216 | if (translate) { | |
217 | /* Look for extension */ | |
218 | for (idx = ocu_len - u_ch, ext_i_len = 0; | |
219 | (idx >= 0) && (ext_i_len < EXT_SIZE); | |
220 | idx -= u_ch, ext_i_len++) { | |
221 | c = ocu[idx]; | |
222 | if (u_ch > 1) | |
223 | c = (c << 8) | ocu[idx + 1]; | |
224 | ||
225 | if (c == EXT_MARK) { | |
226 | if (ext_i_len) | |
227 | i_ext = idx; | |
228 | break; | |
229 | } | |
230 | } | |
231 | if (i_ext >= 0) { | |
232 | /* Convert extension */ | |
233 | ext_max_len = min_t(int, sizeof(ext), str_max_len); | |
234 | ext[ext_o_len++] = EXT_MARK; | |
235 | idx = i_ext + u_ch; | |
236 | while (udf_name_conv_char(ext, ext_max_len, &ext_o_len, | |
237 | ocu, ocu_len, &idx, | |
238 | u_ch, &needsCRC, | |
239 | conv_f, translate)) { | |
240 | if ((ext_o_len + CRC_LEN) < str_max_len) | |
241 | ext_crc_len = ext_o_len; | |
242 | } | |
243 | } | |
244 | } | |
245 | ||
246 | idx = 0; | |
247 | while (1) { | |
248 | if (translate && (idx == i_ext)) { | |
249 | if (str_o_len > (str_max_len - ext_o_len)) | |
250 | needsCRC = 1; | |
3e7fc205 | 251 | break; |
484a10f4 AG |
252 | } |
253 | ||
254 | if (!udf_name_conv_char(str_o, str_max_len, &str_o_len, | |
255 | ocu, ocu_len, &idx, | |
256 | u_ch, &needsCRC, conv_f, translate)) | |
257 | break; | |
258 | ||
259 | if (translate && | |
260 | (str_o_len <= (str_max_len - ext_o_len - CRC_LEN))) | |
261 | o_crc = str_o_len; | |
262 | } | |
263 | ||
264 | if (translate) { | |
265 | if (str_o_len <= 2 && str_o[0] == '.' && | |
266 | (str_o_len == 1 || str_o[1] == '.')) | |
267 | needsCRC = 1; | |
268 | if (needsCRC) { | |
269 | str_o_len = o_crc; | |
270 | valueCRC = crc_itu_t(0, ocu, ocu_len); | |
271 | crc[0] = CRC_MARK; | |
272 | crc[1] = hex_asc_upper_hi(valueCRC >> 8); | |
273 | crc[2] = hex_asc_upper_lo(valueCRC >> 8); | |
274 | crc[3] = hex_asc_upper_hi(valueCRC); | |
275 | crc[4] = hex_asc_upper_lo(valueCRC); | |
276 | len = min_t(int, CRC_LEN, str_max_len - str_o_len); | |
277 | memcpy(&str_o[str_o_len], crc, len); | |
278 | str_o_len += len; | |
279 | ext_o_len = ext_crc_len; | |
280 | } | |
281 | if (ext_o_len > 0) { | |
282 | memcpy(&str_o[str_o_len], ext, ext_o_len); | |
283 | str_o_len += ext_o_len; | |
284 | } | |
1da177e4 | 285 | } |
1da177e4 | 286 | |
9293fcfb | 287 | return str_o_len; |
1da177e4 LT |
288 | } |
289 | ||
9293fcfb AG |
290 | static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, |
291 | const uint8_t *str_i, int str_len, | |
3e7fc205 | 292 | int (*conv_f)(const unsigned char *, int, wchar_t *)) |
1da177e4 | 293 | { |
3e7fc205 AG |
294 | int i, len; |
295 | unsigned int max_val; | |
296 | wchar_t uni_char; | |
bb00c898 | 297 | int u_len, u_ch; |
1da177e4 | 298 | |
9293fcfb AG |
299 | if (ocu_max_len <= 0) |
300 | return 0; | |
301 | ||
302 | memset(ocu, 0, ocu_max_len); | |
1da177e4 | 303 | ocu[0] = 8; |
3e7fc205 | 304 | max_val = 0xff; |
bb00c898 | 305 | u_ch = 1; |
1da177e4 | 306 | |
28de7948 | 307 | try_again: |
9293fcfb AG |
308 | u_len = 1; |
309 | for (i = 0; i < str_len; i++) { | |
bb00c898 | 310 | /* Name didn't fit? */ |
9293fcfb | 311 | if (u_len + u_ch > ocu_max_len) |
bb00c898 | 312 | return 0; |
9293fcfb | 313 | len = conv_f(&str_i[i], str_len - i, &uni_char); |
59285c28 | 314 | if (!len) |
1da177e4 | 315 | continue; |
59285c28 JK |
316 | /* Invalid character, deal with it */ |
317 | if (len < 0) { | |
318 | len = 1; | |
319 | uni_char = '?'; | |
320 | } | |
1da177e4 | 321 | |
cb00ea35 | 322 | if (uni_char > max_val) { |
3e7fc205 AG |
323 | max_val = 0xffff; |
324 | ocu[0] = 0x10; | |
bb00c898 | 325 | u_ch = 2; |
1da177e4 LT |
326 | goto try_again; |
327 | } | |
cb00ea35 | 328 | |
3e7fc205 | 329 | if (max_val == 0xffff) |
9293fcfb AG |
330 | ocu[u_len++] = (uint8_t)(uni_char >> 8); |
331 | ocu[u_len++] = (uint8_t)(uni_char & 0xff); | |
1da177e4 LT |
332 | i += len - 1; |
333 | } | |
334 | ||
9293fcfb | 335 | return u_len; |
1da177e4 LT |
336 | } |
337 | ||
9293fcfb | 338 | int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) |
3e7fc205 | 339 | { |
9293fcfb | 340 | return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, |
484a10f4 | 341 | udf_uni2char_utf8, 0); |
3e7fc205 AG |
342 | } |
343 | ||
9293fcfb | 344 | int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, |
0e5cc9a4 | 345 | uint8_t *dname, int dlen) |
1da177e4 | 346 | { |
3e7fc205 | 347 | int (*conv_f)(wchar_t, unsigned char *, int); |
6ce63836 | 348 | int ret; |
1da177e4 | 349 | |
31f2566f FF |
350 | if (!slen) |
351 | return -EIO; | |
352 | ||
9293fcfb AG |
353 | if (dlen <= 0) |
354 | return 0; | |
355 | ||
cb00ea35 | 356 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
3e7fc205 | 357 | conv_f = udf_uni2char_utf8; |
cb00ea35 | 358 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
3e7fc205 | 359 | conv_f = UDF_SB(sb)->s_nls_map->uni2char; |
4b11111a | 360 | } else |
5dce54b7 | 361 | BUG(); |
530f1a5e | 362 | |
484a10f4 | 363 | ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); |
6ce63836 FF |
364 | /* Zero length filename isn't valid... */ |
365 | if (ret == 0) | |
366 | ret = -EINVAL; | |
5ceb8b55 | 367 | return ret; |
1da177e4 LT |
368 | } |
369 | ||
525e2c56 AG |
370 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, |
371 | uint8_t *dname, int dlen) | |
1da177e4 | 372 | { |
3e7fc205 | 373 | int (*conv_f)(const unsigned char *, int, wchar_t *); |
1da177e4 | 374 | |
cb00ea35 | 375 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
3e7fc205 | 376 | conv_f = udf_char2uni_utf8; |
cb00ea35 | 377 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
3e7fc205 | 378 | conv_f = UDF_SB(sb)->s_nls_map->char2uni; |
4b11111a | 379 | } else |
3e7fc205 | 380 | BUG(); |
1da177e4 | 381 | |
9293fcfb | 382 | return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); |
1da177e4 LT |
383 | } |
384 |