]>
Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
cef72a92 ILT |
2 | Copyright (C) 1987, 90, 91, 92, 93, 94, 95, 1996 |
3 | Free Software Foundation, Inc. | |
6efd877d | 4 | |
a39116f1 | 5 | This file is part of GAS, the GNU Assembler. |
6efd877d | 6 | |
a39116f1 RP |
7 | GAS is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
6efd877d | 11 | |
a39116f1 RP |
12 | GAS is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
6efd877d | 16 | |
a39116f1 RP |
17 | You should have received a copy of the GNU General Public License |
18 | along with GAS; see the file COPYING. If not, write to | |
a2a5a4fa | 19 | the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
fecd2382 | 20 | |
58d4951d | 21 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
22 | /* App, the assembler pre-processor. This pre-processor strips out excess |
23 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 24 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
a2a5a4fa | 25 | pair. This needs better error-handling. */ |
fecd2382 RP |
26 | |
27 | #include <stdio.h> | |
6efd877d | 28 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 29 | |
5a051773 SS |
30 | #if (__STDC__ != 1) |
31 | #ifndef const | |
32 | #define const /* empty */ | |
33 | #endif | |
fecd2382 RP |
34 | #endif |
35 | ||
6efd877d | 36 | static char lex[256]; |
6d331d71 | 37 | static const char symbol_chars[] = |
6efd877d | 38 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
39 | |
40 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
41 | #define LEX_IS_WHITESPACE 2 | |
42 | #define LEX_IS_LINE_SEPARATOR 3 | |
43 | #define LEX_IS_COMMENT_START 4 | |
44 | #define LEX_IS_LINE_COMMENT_START 5 | |
45 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
46 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
47 | #define LEX_IS_STRINGQUOTE 8 | |
48 | #define LEX_IS_COLON 9 | |
49 | #define LEX_IS_NEWLINE 10 | |
50 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
51 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
52 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
53 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
54 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
55 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
56 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
57 | ||
385ce433 JL |
58 | static int process_escape PARAMS ((int)); |
59 | ||
a39116f1 RP |
60 | /* FIXME-soon: The entire lexer/parser thingy should be |
61 | built statically at compile time rather than dynamically | |
62 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 63 | |
6efd877d KR |
64 | void |
65 | do_scrub_begin () | |
66 | { | |
67 | const char *p; | |
68 | ||
69 | lex[' '] = LEX_IS_WHITESPACE; | |
70 | lex['\t'] = LEX_IS_WHITESPACE; | |
71 | lex['\n'] = LEX_IS_NEWLINE; | |
72 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
6efd877d | 73 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 74 | |
cef72a92 | 75 | if (! flag_m68k_mri) |
7172e226 ILT |
76 | { |
77 | lex['"'] = LEX_IS_STRINGQUOTE; | |
be06bdcd | 78 | |
7172e226 ILT |
79 | #ifndef TC_HPPA |
80 | lex['\''] = LEX_IS_ONECHAR_QUOTE; | |
81 | #endif | |
be06bdcd SC |
82 | |
83 | #ifdef SINGLE_QUOTE_STRINGS | |
7172e226 | 84 | lex['\''] = LEX_IS_STRINGQUOTE; |
7c2d4011 | 85 | #endif |
7172e226 | 86 | } |
be06bdcd | 87 | |
86038ada ILT |
88 | /* Note: if any other character can be LEX_IS_STRINGQUOTE, the loop |
89 | in state 5 of do_scrub_chars must be changed. */ | |
90 | ||
a2a5a4fa KR |
91 | /* Note that these override the previous defaults, e.g. if ';' is a |
92 | comment char, then it isn't a line separator. */ | |
6efd877d KR |
93 | for (p = symbol_chars; *p; ++p) |
94 | { | |
58d4951d | 95 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
96 | } /* declare symbol characters */ |
97 | ||
6efd877d KR |
98 | for (p = comment_chars; *p; p++) |
99 | { | |
58d4951d | 100 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
101 | } /* declare comment chars */ |
102 | ||
9a7d824a ILT |
103 | for (p = line_comment_chars; *p; p++) |
104 | { | |
58d4951d | 105 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
106 | } /* declare line comment chars */ |
107 | ||
6efd877d KR |
108 | for (p = line_separator_chars; *p; p++) |
109 | { | |
58d4951d | 110 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
111 | } /* declare line separators */ |
112 | ||
113 | /* Only allow slash-star comments if slash is not in use */ | |
114 | if (lex['/'] == 0) | |
115 | { | |
116 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
117 | } | |
a2a5a4fa KR |
118 | /* FIXME-soon. This is a bad hack but otherwise, we can't do |
119 | c-style comments when '/' is a line comment char. xoxorich. */ | |
6efd877d KR |
120 | if (lex['*'] == 0) |
121 | { | |
122 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
123 | } | |
7172e226 | 124 | |
cef72a92 | 125 | if (flag_m68k_mri) |
7172e226 ILT |
126 | { |
127 | lex['\''] = LEX_IS_STRINGQUOTE; | |
128 | lex[';'] = LEX_IS_COMMENT_START; | |
129 | lex['*'] = LEX_IS_LINE_COMMENT_START; | |
130 | /* The MRI documentation says '!' is LEX_IS_COMMENT_START, but | |
131 | then it can't be used in an expression. */ | |
132 | lex['!'] = LEX_IS_LINE_COMMENT_START; | |
133 | } | |
6efd877d | 134 | } /* do_scrub_begin() */ |
fecd2382 | 135 | |
fecd2382 RP |
136 | /* Saved state of the scrubber */ |
137 | static int state; | |
138 | static int old_state; | |
139 | static char *out_string; | |
140 | static char out_buf[20]; | |
86038ada ILT |
141 | static int add_newlines; |
142 | static char *saved_input; | |
143 | static int saved_input_len; | |
fecd2382 RP |
144 | |
145 | /* Data structure for saving the state of app across #include's. Note that | |
146 | app is called asynchronously to the parsing of the .include's, so our | |
147 | state at the time .include is interpreted is completely unrelated. | |
148 | That's why we have to save it all. */ | |
149 | ||
6efd877d KR |
150 | struct app_save |
151 | { | |
152 | int state; | |
153 | int old_state; | |
154 | char *out_string; | |
155 | char out_buf[sizeof (out_buf)]; | |
156 | int add_newlines; | |
86038ada ILT |
157 | char *saved_input; |
158 | int saved_input_len; | |
6efd877d KR |
159 | }; |
160 | ||
161 | char * | |
162 | app_push () | |
163 | { | |
7c2d4011 SC |
164 | register struct app_save *saved; |
165 | ||
6efd877d KR |
166 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
167 | saved->state = state; | |
168 | saved->old_state = old_state; | |
169 | saved->out_string = out_string; | |
58d4951d | 170 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d | 171 | saved->add_newlines = add_newlines; |
86038ada ILT |
172 | saved->saved_input = saved_input; |
173 | saved->saved_input_len = saved_input_len; | |
7c2d4011 SC |
174 | |
175 | /* do_scrub_begin() is not useful, just wastes time. */ | |
86038ada ILT |
176 | |
177 | state = 0; | |
178 | saved_input = NULL; | |
179 | ||
6efd877d | 180 | return (char *) saved; |
fecd2382 RP |
181 | } |
182 | ||
6efd877d KR |
183 | void |
184 | app_pop (arg) | |
185 | char *arg; | |
fecd2382 | 186 | { |
6efd877d KR |
187 | register struct app_save *saved = (struct app_save *) arg; |
188 | ||
189 | /* There is no do_scrub_end (). */ | |
190 | state = saved->state; | |
191 | old_state = saved->old_state; | |
192 | out_string = saved->out_string; | |
58d4951d | 193 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d | 194 | add_newlines = saved->add_newlines; |
86038ada ILT |
195 | saved_input = saved->saved_input; |
196 | saved_input_len = saved->saved_input_len; | |
6efd877d KR |
197 | |
198 | free (arg); | |
199 | } /* app_pop() */ | |
200 | ||
6d331d71 KR |
201 | /* @@ This assumes that \n &c are the same on host and target. This is not |
202 | necessarily true. */ | |
385ce433 | 203 | static int |
6efd877d | 204 | process_escape (ch) |
385ce433 | 205 | int ch; |
7c2d4011 | 206 | { |
6efd877d KR |
207 | switch (ch) |
208 | { | |
209 | case 'b': | |
210 | return '\b'; | |
211 | case 'f': | |
212 | return '\f'; | |
213 | case 'n': | |
214 | return '\n'; | |
215 | case 'r': | |
216 | return '\r'; | |
217 | case 't': | |
218 | return '\t'; | |
219 | case '\'': | |
220 | return '\''; | |
221 | case '"': | |
6d331d71 | 222 | return '\"'; |
6efd877d KR |
223 | default: |
224 | return ch; | |
225 | } | |
7c2d4011 | 226 | } |
86038ada ILT |
227 | |
228 | /* This function is called to process input characters. The GET | |
229 | parameter is used to retrieve more input characters. GET should | |
230 | set its parameter to point to a buffer, and return the length of | |
231 | the buffer; it should return 0 at end of file. The scrubbed output | |
232 | characters are put into the buffer starting at TOSTART; the TOSTART | |
233 | buffer is TOLEN bytes in length. The function returns the number | |
234 | of scrubbed characters put into TOSTART. This will be TOLEN unless | |
235 | end of file was seen. This function is arranged as a state | |
236 | machine, and saves its state so that it may return at any point. | |
237 | This is the way the old code used to work. */ | |
238 | ||
239 | int | |
240 | do_scrub_chars (get, tostart, tolen) | |
241 | int (*get) PARAMS ((char **)); | |
242 | char *tostart; | |
243 | int tolen; | |
fecd2382 | 244 | { |
86038ada ILT |
245 | char *to = tostart; |
246 | char *toend = tostart + tolen; | |
247 | char *from; | |
248 | char *fromend; | |
249 | int fromlen; | |
250 | register int ch, ch2 = 0; | |
251 | int not_cpp_line = 0; | |
252 | ||
6efd877d | 253 | /*State 0: beginning of normal line |
a39116f1 RP |
254 | 1: After first whitespace on line (flush more white) |
255 | 2: After first non-white (opcode) on line (keep 1white) | |
256 | 3: after second white on line (into operands) (flush white) | |
257 | 4: after putting out a .line, put out digits | |
258 | 5: parsing a string, then go to old-state | |
259 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
260 | 7: After putting out a .appfile, put out string. |
261 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 262 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 263 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a2a5a4fa KR |
264 | 11: After seeing a symbol character in state 0 (eg a label definition) |
265 | -1: output string in out_string and go to the state in old_state | |
266 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
a39116f1 | 267 | */ |
6efd877d | 268 | |
9a7d824a ILT |
269 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
270 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
271 | 120''. States 9 and 10 ensure that a space is never dropped in | |
272 | between characters which could appear in a identifier. Ian | |
a2a5a4fa KR |
273 | Taylor, [email protected]. |
274 | ||
275 | I added state 11 so that something like "Lfoo add %r25,%r26,%r27" works | |
276 | correctly on the PA (and any other target where colons are optional). | |
277 | Jeff Law, [email protected]. */ | |
278 | ||
86038ada ILT |
279 | /* This macro gets the next input character. */ |
280 | ||
281 | #define GET() \ | |
282 | (from < fromend \ | |
283 | ? *from++ \ | |
284 | : ((saved_input != NULL \ | |
285 | ? (free (saved_input), \ | |
286 | saved_input = NULL, \ | |
287 | 0) \ | |
288 | : 0), \ | |
289 | fromlen = (*get) (&from), \ | |
290 | fromend = from + fromlen, \ | |
291 | (fromlen == 0 \ | |
292 | ? EOF \ | |
293 | : *from++))) | |
294 | ||
295 | /* This macro pushes a character back on the input stream. */ | |
296 | ||
297 | #define UNGET(uch) (*--from = (uch)) | |
298 | ||
299 | /* This macro puts a character into the output buffer. If this | |
300 | character fills the output buffer, this macro jumps to the label | |
301 | TOFULL. We use this rather ugly approach because we need to | |
302 | handle two different termination conditions: EOF on the input | |
303 | stream, and a full output buffer. It would be simpler if we | |
304 | always read in the entire input stream before processing it, but | |
305 | I don't want to make such a significant change to the assembler's | |
306 | memory usage. */ | |
307 | ||
308 | #define PUT(pch) \ | |
309 | do \ | |
310 | { \ | |
311 | *to++ = (pch); \ | |
312 | if (to >= toend) \ | |
313 | goto tofull; \ | |
314 | } \ | |
315 | while (0) | |
316 | ||
317 | if (saved_input != NULL) | |
6efd877d | 318 | { |
86038ada ILT |
319 | from = saved_input; |
320 | fromend = from + saved_input_len; | |
321 | } | |
322 | else | |
323 | { | |
324 | fromlen = (*get) (&from); | |
325 | if (fromlen == 0) | |
326 | return 0; | |
327 | fromend = from + fromlen; | |
328 | } | |
6efd877d | 329 | |
86038ada ILT |
330 | while (1) |
331 | { | |
332 | /* The cases in this switch end with continue, in order to | |
333 | branch back to the top of this while loop and generate the | |
334 | next output character in the appropriate state. */ | |
335 | switch (state) | |
6efd877d | 336 | { |
86038ada ILT |
337 | case -1: |
338 | ch = *out_string++; | |
339 | if (*out_string == '\0') | |
6efd877d | 340 | { |
86038ada ILT |
341 | state = old_state; |
342 | old_state = 3; | |
6efd877d | 343 | } |
86038ada ILT |
344 | PUT (ch); |
345 | continue; | |
6efd877d | 346 | |
86038ada ILT |
347 | case -2: |
348 | for (;;) | |
6efd877d | 349 | { |
86038ada ILT |
350 | do |
351 | { | |
352 | ch = GET (); | |
6efd877d | 353 | |
86038ada ILT |
354 | if (ch == EOF) |
355 | { | |
356 | as_warn ("end of file in comment"); | |
357 | goto fromeof; | |
358 | } | |
359 | ||
360 | if (ch == '\n') | |
361 | PUT ('\n'); | |
362 | } | |
363 | while (ch != '*'); | |
364 | ||
365 | while ((ch = GET ()) == '*') | |
366 | ; | |
367 | ||
368 | if (ch == EOF) | |
369 | { | |
370 | as_warn ("end of file in comment"); | |
371 | goto fromeof; | |
372 | } | |
373 | ||
374 | if (ch == '/') | |
375 | break; | |
376 | ||
377 | UNGET (ch); | |
6efd877d | 378 | } |
86038ada ILT |
379 | |
380 | state = old_state; | |
381 | PUT (' '); | |
382 | continue; | |
383 | ||
384 | case 4: | |
385 | ch = GET (); | |
386 | if (ch == EOF) | |
387 | goto fromeof; | |
388 | else if (ch >= '0' && ch <= '9') | |
389 | PUT (ch); | |
6efd877d KR |
390 | else |
391 | { | |
86038ada | 392 | while (ch != EOF && IS_WHITESPACE (ch)) |
a2a5a4fa | 393 | ch = GET (); |
86038ada ILT |
394 | if (ch == '"') |
395 | { | |
396 | UNGET (ch); | |
cef72a92 ILT |
397 | if (flag_m68k_mri) |
398 | out_string = "\n\tappfile "; | |
399 | else | |
400 | out_string = "\n\t.appfile "; | |
86038ada ILT |
401 | old_state = 7; |
402 | state = -1; | |
403 | PUT (*out_string++); | |
404 | } | |
405 | else | |
406 | { | |
407 | while (ch != EOF && ch != '\n') | |
408 | ch = GET (); | |
409 | state = 0; | |
410 | PUT (ch); | |
411 | } | |
6efd877d | 412 | } |
86038ada | 413 | continue; |
6efd877d | 414 | |
86038ada ILT |
415 | case 5: |
416 | /* We are going to copy everything up to a quote character, | |
417 | with special handling for a backslash. We try to | |
418 | optimize the copying in the simple case without using the | |
419 | GET and PUT macros. */ | |
420 | { | |
421 | char *s; | |
422 | int len; | |
423 | ||
424 | for (s = from; s < fromend; s++) | |
425 | { | |
426 | ch = *s; | |
427 | /* This condition must be changed if the type of any | |
428 | other character can be LEX_IS_STRINGQUOTE. */ | |
429 | if (ch == '\\' | |
430 | || ch == '"' | |
431 | || ch == '\'' | |
432 | || ch == '\n') | |
433 | break; | |
434 | } | |
435 | len = s - from; | |
436 | if (len > toend - to) | |
437 | len = toend - to; | |
438 | if (len > 0) | |
439 | { | |
440 | memcpy (to, from, len); | |
441 | to += len; | |
442 | from += len; | |
443 | } | |
444 | } | |
445 | ||
446 | ch = GET (); | |
447 | if (ch == EOF) | |
448 | { | |
449 | as_warn ("end of file in string: inserted '\"'"); | |
450 | state = old_state; | |
451 | UNGET ('\n'); | |
452 | PUT ('"'); | |
453 | } | |
454 | else if (lex[ch] == LEX_IS_STRINGQUOTE) | |
455 | { | |
456 | state = old_state; | |
457 | PUT (ch); | |
458 | } | |
a2a5a4fa | 459 | #ifndef NO_STRING_ESCAPES |
86038ada ILT |
460 | else if (ch == '\\') |
461 | { | |
462 | state = 6; | |
463 | PUT (ch); | |
464 | } | |
a2a5a4fa | 465 | #endif |
cef72a92 | 466 | else if (flag_m68k_mri && ch == '\n') |
86038ada ILT |
467 | { |
468 | /* Just quietly terminate the string. This permits lines like | |
469 | bne label loop if we haven't reach end yet | |
470 | */ | |
471 | state = old_state; | |
472 | UNGET (ch); | |
473 | PUT ('\''); | |
474 | } | |
475 | else | |
476 | { | |
477 | PUT (ch); | |
478 | } | |
479 | continue; | |
6efd877d | 480 | |
86038ada ILT |
481 | case 6: |
482 | state = 5; | |
483 | ch = GET (); | |
484 | switch (ch) | |
485 | { | |
486 | /* Handle strings broken across lines, by turning '\n' into | |
487 | '\\' and 'n'. */ | |
488 | case '\n': | |
489 | UNGET ('n'); | |
490 | add_newlines++; | |
491 | PUT ('\\'); | |
492 | continue; | |
493 | ||
494 | case '"': | |
495 | case '\\': | |
496 | case 'b': | |
497 | case 'f': | |
498 | case 'n': | |
499 | case 'r': | |
500 | case 't': | |
501 | case 'v': | |
502 | case 'x': | |
503 | case 'X': | |
504 | case '0': | |
505 | case '1': | |
506 | case '2': | |
507 | case '3': | |
508 | case '4': | |
509 | case '5': | |
510 | case '6': | |
511 | case '7': | |
512 | break; | |
7c2d4011 | 513 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
86038ada ILT |
514 | default: |
515 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
516 | break; | |
517 | #else /* ONLY_STANDARD_ESCAPES */ | |
518 | default: | |
519 | /* Accept \x as x for any x */ | |
520 | break; | |
fecd2382 | 521 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 522 | |
86038ada ILT |
523 | case EOF: |
524 | as_warn ("End of file in string: '\"' inserted"); | |
525 | PUT ('"'); | |
526 | continue; | |
527 | } | |
528 | PUT (ch); | |
529 | continue; | |
6efd877d | 530 | |
86038ada ILT |
531 | case 7: |
532 | ch = GET (); | |
533 | state = 5; | |
534 | old_state = 8; | |
535 | if (ch == EOF) | |
536 | goto fromeof; | |
537 | PUT (ch); | |
538 | continue; | |
6efd877d | 539 | |
86038ada ILT |
540 | case 8: |
541 | do | |
542 | ch = GET (); | |
543 | while (ch != '\n' && ch != EOF); | |
544 | if (ch == EOF) | |
545 | goto fromeof; | |
a2a5a4fa | 546 | state = 0; |
86038ada ILT |
547 | PUT (ch); |
548 | continue; | |
a2a5a4fa | 549 | } |
6efd877d | 550 | |
86038ada | 551 | /* OK, we are somewhere in states 0 through 4 or 9 through 11 */ |
6efd877d | 552 | |
86038ada ILT |
553 | /* flushchar: */ |
554 | ch = GET (); | |
555 | recycle: | |
556 | if (ch == EOF) | |
6efd877d | 557 | { |
86038ada ILT |
558 | if (state != 0) |
559 | { | |
560 | as_warn ("end of file not at end of a line; newline inserted"); | |
561 | state = 0; | |
562 | PUT ('\n'); | |
563 | } | |
564 | goto fromeof; | |
fecd2382 | 565 | } |
6efd877d | 566 | |
86038ada | 567 | switch (lex[ch]) |
6efd877d | 568 | { |
86038ada | 569 | case LEX_IS_WHITESPACE: |
86038ada ILT |
570 | do |
571 | { | |
572 | ch = GET (); | |
573 | } | |
574 | while (ch != EOF && IS_WHITESPACE (ch)); | |
575 | if (ch == EOF) | |
576 | goto fromeof; | |
6efd877d | 577 | |
cef72a92 ILT |
578 | if (state == 0) |
579 | { | |
580 | /* Preserve a single whitespace character at the | |
581 | beginning of a line. */ | |
582 | state = 1; | |
583 | UNGET (ch); | |
584 | PUT (' '); | |
585 | break; | |
586 | } | |
587 | ||
86038ada | 588 | if (IS_COMMENT (ch) |
86038ada ILT |
589 | || ch == '/' |
590 | || IS_LINE_SEPARATOR (ch)) | |
591 | { | |
592 | /* cpp never outputs a leading space before the #, so | |
593 | try to avoid being confused. */ | |
594 | not_cpp_line = 1; | |
cef72a92 | 595 | if (flag_m68k_mri) |
92a25e12 ILT |
596 | { |
597 | /* In MRI mode, we keep these spaces. */ | |
598 | UNGET (ch); | |
599 | PUT (' '); | |
600 | break; | |
601 | } | |
86038ada ILT |
602 | goto recycle; |
603 | } | |
6efd877d | 604 | |
86038ada ILT |
605 | /* If we're in state 2 or 11, we've seen a non-white |
606 | character followed by whitespace. If the next character | |
607 | is ':', this is whitespace after a label name which we | |
608 | normally must ignore. In MRI mode, though, spaces are | |
609 | not permitted between the label and the colon. */ | |
610 | if ((state == 2 || state == 11) | |
611 | && lex[ch] == LEX_IS_COLON | |
cef72a92 | 612 | && ! flag_m68k_mri) |
6efd877d | 613 | { |
86038ada ILT |
614 | state = 1; |
615 | PUT (ch); | |
616 | break; | |
617 | } | |
618 | ||
619 | switch (state) | |
620 | { | |
621 | case 0: | |
622 | state++; | |
623 | goto recycle; /* Punted leading sp */ | |
624 | case 1: | |
625 | /* We can arrive here if we leave a leading whitespace | |
626 | character at the beginning of a line. */ | |
627 | goto recycle; | |
628 | case 2: | |
629 | state = 3; | |
630 | if (to + 1 < toend) | |
6efd877d | 631 | { |
86038ada ILT |
632 | /* Optimize common case by skipping UNGET/GET. */ |
633 | PUT (' '); /* Sp after opco */ | |
634 | goto recycle; | |
fecd2382 | 635 | } |
86038ada ILT |
636 | UNGET (ch); |
637 | PUT (' '); | |
638 | break; | |
639 | case 3: | |
cef72a92 | 640 | if (flag_m68k_mri) |
86038ada ILT |
641 | { |
642 | /* In MRI mode, we keep these spaces. */ | |
643 | UNGET (ch); | |
644 | PUT (' '); | |
645 | break; | |
646 | } | |
647 | goto recycle; /* Sp in operands */ | |
648 | case 9: | |
649 | case 10: | |
cef72a92 | 650 | if (flag_m68k_mri) |
86038ada ILT |
651 | { |
652 | /* In MRI mode, we keep these spaces. */ | |
653 | state = 3; | |
654 | UNGET (ch); | |
655 | PUT (' '); | |
656 | break; | |
657 | } | |
658 | state = 10; /* Sp after symbol char */ | |
659 | goto recycle; | |
660 | case 11: | |
661 | state = 1; | |
662 | UNGET (ch); | |
663 | PUT (' '); /* Sp after label definition. */ | |
664 | break; | |
665 | default: | |
666 | BAD_CASE (state); | |
667 | } | |
668 | break; | |
6efd877d | 669 | |
86038ada ILT |
670 | case LEX_IS_TWOCHAR_COMMENT_1ST: |
671 | ch2 = GET (); | |
672 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) | |
673 | { | |
674 | for (;;) | |
6efd877d | 675 | { |
86038ada ILT |
676 | do |
677 | { | |
678 | ch2 = GET (); | |
679 | if (ch2 != EOF && IS_NEWLINE (ch2)) | |
680 | add_newlines++; | |
681 | } | |
682 | while (ch2 != EOF && | |
683 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
684 | ||
685 | while (ch2 != EOF && | |
686 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
687 | { | |
688 | ch2 = GET (); | |
689 | } | |
690 | ||
691 | if (ch2 == EOF | |
692 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
693 | break; | |
694 | UNGET (ch); | |
fecd2382 | 695 | } |
86038ada ILT |
696 | if (ch2 == EOF) |
697 | as_warn ("end of file in multiline comment"); | |
6efd877d | 698 | |
86038ada ILT |
699 | ch = ' '; |
700 | goto recycle; | |
6efd877d | 701 | } |
86038ada ILT |
702 | else |
703 | { | |
704 | if (ch2 != EOF) | |
705 | UNGET (ch2); | |
706 | if (state == 9 || state == 10) | |
707 | state = 3; | |
708 | PUT (ch); | |
709 | } | |
710 | break; | |
6efd877d | 711 | |
86038ada ILT |
712 | case LEX_IS_STRINGQUOTE: |
713 | if (state == 10) | |
714 | { | |
715 | /* Preserve the whitespace in foo "bar" */ | |
716 | UNGET (ch); | |
717 | state = 3; | |
718 | PUT (' '); | |
719 | ||
720 | /* PUT didn't jump out. We could just break, but we | |
721 | know what will happen, so optimize a bit. */ | |
722 | ch = GET (); | |
723 | old_state = 3; | |
724 | } | |
725 | else if (state == 9) | |
726 | old_state = 3; | |
727 | else | |
728 | old_state = state; | |
729 | state = 5; | |
730 | PUT (ch); | |
731 | break; | |
6efd877d | 732 | |
a39116f1 | 733 | #ifndef IEEE_STYLE |
86038ada ILT |
734 | case LEX_IS_ONECHAR_QUOTE: |
735 | if (state == 10) | |
736 | { | |
737 | /* Preserve the whitespace in foo 'b' */ | |
738 | UNGET (ch); | |
739 | state = 3; | |
740 | PUT (' '); | |
741 | break; | |
742 | } | |
a2a5a4fa | 743 | ch = GET (); |
86038ada ILT |
744 | if (ch == EOF) |
745 | { | |
746 | as_warn ("end of file after a one-character quote; \\0 inserted"); | |
747 | ch = 0; | |
748 | } | |
749 | if (ch == '\\') | |
750 | { | |
751 | ch = GET (); | |
752 | if (ch == EOF) | |
753 | { | |
754 | as_warn ("end of file in escape character"); | |
755 | ch = '\\'; | |
756 | } | |
757 | else | |
758 | ch = process_escape (ch); | |
759 | } | |
760 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
6efd877d | 761 | |
86038ada ILT |
762 | /* None of these 'x constants for us. We want 'x'. */ |
763 | if ((ch = GET ()) != '\'') | |
764 | { | |
fecd2382 | 765 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
86038ada | 766 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 767 | #else |
86038ada ILT |
768 | if (ch != EOF) |
769 | UNGET (ch); | |
fecd2382 | 770 | #endif |
86038ada ILT |
771 | } |
772 | if (strlen (out_buf) == 1) | |
773 | { | |
774 | PUT (out_buf[0]); | |
775 | break; | |
776 | } | |
777 | if (state == 9) | |
778 | old_state = 3; | |
779 | else | |
780 | old_state = state; | |
781 | state = -1; | |
782 | out_string = out_buf; | |
783 | PUT (*out_string++); | |
784 | break; | |
a39116f1 | 785 | #endif |
6efd877d | 786 | |
86038ada ILT |
787 | case LEX_IS_COLON: |
788 | if (state == 9 || state == 10) | |
789 | state = 3; | |
790 | else if (state != 3) | |
791 | state = 1; | |
792 | PUT (ch); | |
793 | break; | |
6efd877d | 794 | |
86038ada ILT |
795 | case LEX_IS_NEWLINE: |
796 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
797 | if (add_newlines) | |
798 | { | |
799 | --add_newlines; | |
800 | UNGET (ch); | |
801 | } | |
802 | /* fall thru into... */ | |
6efd877d | 803 | |
86038ada ILT |
804 | case LEX_IS_LINE_SEPARATOR: |
805 | state = 0; | |
806 | PUT (ch); | |
807 | break; | |
808 | ||
809 | case LEX_IS_LINE_COMMENT_START: | |
810 | if (state == 0) /* Only comment at start of line. */ | |
811 | { | |
812 | /* FIXME-someday: The two character comment stuff was | |
813 | badly thought out. On i386, we want '/' as line | |
814 | comment start AND we want C style comments. hence | |
815 | this hack. The whole lexical process should be | |
816 | reworked. xoxorich. */ | |
817 | if (ch == '/') | |
818 | { | |
819 | ch2 = GET (); | |
820 | if (ch2 == '*') | |
821 | { | |
822 | state = -2; | |
823 | break; | |
824 | } | |
825 | else | |
826 | { | |
827 | UNGET (ch2); | |
828 | } | |
829 | } /* bad hack */ | |
830 | ||
831 | if (ch != '#') | |
832 | not_cpp_line = 1; | |
833 | ||
834 | do | |
9a7d824a | 835 | { |
86038ada | 836 | ch = GET (); |
9a7d824a | 837 | } |
86038ada ILT |
838 | while (ch != EOF && IS_WHITESPACE (ch)); |
839 | if (ch == EOF) | |
9a7d824a | 840 | { |
86038ada ILT |
841 | as_warn ("end of file in comment; newline inserted"); |
842 | PUT ('\n'); | |
843 | break; | |
9a7d824a | 844 | } |
86038ada ILT |
845 | if (ch < '0' || ch > '9' || not_cpp_line) |
846 | { | |
847 | /* Non-numerics: Eat whole comment line */ | |
848 | while (ch != EOF && !IS_NEWLINE (ch)) | |
849 | ch = GET (); | |
850 | if (ch == EOF) | |
851 | as_warn ("EOF in Comment: Newline inserted"); | |
852 | state = 0; | |
853 | PUT ('\n'); | |
854 | break; | |
855 | } | |
856 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
857 | UNGET (ch); | |
858 | old_state = 4; | |
859 | state = -1; | |
cef72a92 ILT |
860 | if (flag_m68k_mri) |
861 | out_string = "\tappline "; | |
862 | else | |
863 | out_string = "\t.appline "; | |
86038ada ILT |
864 | PUT (*out_string++); |
865 | break; | |
866 | } | |
385ce433 | 867 | |
86038ada ILT |
868 | /* We have a line comment character which is not at the |
869 | start of a line. If this is also a normal comment | |
870 | character, fall through. Otherwise treat it as a default | |
871 | character. */ | |
872 | if (strchr (comment_chars, ch) == NULL | |
cef72a92 | 873 | && (! flag_m68k_mri |
86038ada ILT |
874 | || (ch != '!' && ch != '*'))) |
875 | goto de_fault; | |
cef72a92 | 876 | if (flag_m68k_mri |
15ed5f2c | 877 | && (ch == '!' || ch == '*' || ch == '#') |
86038ada ILT |
878 | && state != 1 |
879 | && state != 10) | |
880 | goto de_fault; | |
881 | /* Fall through. */ | |
882 | case LEX_IS_COMMENT_START: | |
9a7d824a | 883 | do |
9a7d824a | 884 | { |
86038ada | 885 | ch = GET (); |
9a7d824a | 886 | } |
86038ada ILT |
887 | while (ch != EOF && !IS_NEWLINE (ch)); |
888 | if (ch == EOF) | |
889 | as_warn ("end of file in comment; newline inserted"); | |
890 | state = 0; | |
891 | PUT ('\n'); | |
892 | break; | |
893 | ||
894 | case LEX_IS_SYMBOL_COMPONENT: | |
895 | if (state == 10) | |
9a7d824a | 896 | { |
86038ada ILT |
897 | /* This is a symbol character following another symbol |
898 | character, with whitespace in between. We skipped | |
899 | the whitespace earlier, so output it now. */ | |
900 | UNGET (ch); | |
901 | state = 3; | |
902 | PUT (' '); | |
903 | break; | |
9a7d824a | 904 | } |
6efd877d | 905 | |
86038ada ILT |
906 | if (state == 3) |
907 | state = 9; | |
6efd877d | 908 | |
86038ada ILT |
909 | /* This is a common case. Quickly copy CH and all the |
910 | following symbol component or normal characters. */ | |
911 | if (to + 1 < toend) | |
912 | { | |
913 | char *s; | |
914 | int len; | |
a2a5a4fa | 915 | |
86038ada ILT |
916 | for (s = from; s < fromend; s++) |
917 | { | |
918 | int type; | |
fecd2382 | 919 | |
86038ada ILT |
920 | ch2 = *s; |
921 | type = lex[ch2]; | |
922 | if (type != 0 | |
923 | && type != LEX_IS_SYMBOL_COMPONENT) | |
924 | break; | |
925 | } | |
926 | if (s > from) | |
927 | { | |
928 | /* Handle the last character normally, for | |
929 | simplicity. */ | |
930 | --s; | |
931 | } | |
932 | len = s - from; | |
933 | if (len > (toend - to) - 1) | |
934 | len = (toend - to) - 1; | |
935 | if (len > 0) | |
936 | { | |
937 | PUT (ch); | |
938 | if (len > 8) | |
939 | { | |
940 | memcpy (to, from, len); | |
941 | to += len; | |
942 | from += len; | |
943 | } | |
944 | else | |
945 | { | |
946 | switch (len) | |
947 | { | |
948 | case 8: *to++ = *from++; | |
949 | case 7: *to++ = *from++; | |
950 | case 6: *to++ = *from++; | |
951 | case 5: *to++ = *from++; | |
952 | case 4: *to++ = *from++; | |
953 | case 3: *to++ = *from++; | |
954 | case 2: *to++ = *from++; | |
955 | case 1: *to++ = *from++; | |
956 | } | |
957 | } | |
958 | ch = GET (); | |
959 | } | |
960 | } | |
fecd2382 | 961 | |
86038ada ILT |
962 | /* Fall through. */ |
963 | default: | |
964 | de_fault: | |
965 | /* Some relatively `normal' character. */ | |
966 | if (state == 0) | |
967 | { | |
968 | state = 11; /* Now seeing label definition */ | |
969 | } | |
970 | else if (state == 1) | |
971 | { | |
972 | state = 2; /* Ditto */ | |
973 | } | |
974 | else if (state == 9) | |
975 | { | |
976 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
977 | state = 3; | |
978 | } | |
979 | else if (state == 10) | |
980 | { | |
981 | state = 3; | |
982 | } | |
983 | PUT (ch); | |
984 | break; | |
985 | } | |
986 | } | |
fecd2382 | 987 | |
86038ada | 988 | /*NOTREACHED*/ |
6efd877d | 989 | |
86038ada ILT |
990 | fromeof: |
991 | /* We have reached the end of the input. */ | |
992 | return to - tostart; | |
fecd2382 | 993 | |
86038ada ILT |
994 | tofull: |
995 | /* The output buffer is full. Save any input we have not yet | |
996 | processed. */ | |
997 | if (fromend > from) | |
998 | { | |
999 | char *save; | |
1000 | ||
1001 | save = (char *) xmalloc (fromend - from); | |
1002 | memcpy (save, from, fromend - from); | |
1003 | if (saved_input != NULL) | |
1004 | free (saved_input); | |
1005 | saved_input = save; | |
1006 | saved_input_len = fromend - from; | |
1007 | } | |
1008 | else | |
1009 | { | |
1010 | if (saved_input != NULL) | |
1011 | { | |
1012 | free (saved_input); | |
1013 | saved_input = NULL; | |
1014 | } | |
1015 | } | |
1016 | return to - tostart; | |
fecd2382 | 1017 | } |
6efd877d | 1018 | |
fecd2382 | 1019 | /* end of app.c */ |