]>
Commit | Line | Data |
---|---|---|
fecd2382 | 1 | /* This is the Assembler Pre-Processor |
5a051773 | 2 | Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc. |
6efd877d | 3 | |
a39116f1 | 4 | This file is part of GAS, the GNU Assembler. |
6efd877d | 5 | |
a39116f1 RP |
6 | GAS is free software; you can redistribute it and/or modify |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2, or (at your option) | |
9 | any later version. | |
6efd877d | 10 | |
a39116f1 RP |
11 | GAS is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
6efd877d | 15 | |
a39116f1 RP |
16 | You should have received a copy of the GNU General Public License |
17 | along with GAS; see the file COPYING. If not, write to | |
18 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
fecd2382 | 19 | |
58d4951d | 20 | /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */ |
fecd2382 RP |
21 | /* App, the assembler pre-processor. This pre-processor strips out excess |
22 | spaces, turns single-quoted characters into a decimal constant, and turns | |
9a7d824a | 23 | # <number> <filename> <garbage> into a .line <number>\n.file <filename> |
be06bdcd | 24 | pair. This needs better error-handling. |
a39116f1 | 25 | */ |
fecd2382 RP |
26 | |
27 | #include <stdio.h> | |
6efd877d | 28 | #include "as.h" /* For BAD_CASE() only */ |
fecd2382 | 29 | |
5a051773 SS |
30 | #if (__STDC__ != 1) |
31 | #ifndef const | |
32 | #define const /* empty */ | |
33 | #endif | |
fecd2382 RP |
34 | #endif |
35 | ||
6efd877d | 36 | static char lex[256]; |
6d331d71 | 37 | static const char symbol_chars[] = |
6efd877d | 38 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
39 | |
40 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
41 | #define LEX_IS_WHITESPACE 2 | |
42 | #define LEX_IS_LINE_SEPARATOR 3 | |
43 | #define LEX_IS_COMMENT_START 4 | |
44 | #define LEX_IS_LINE_COMMENT_START 5 | |
45 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
46 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
47 | #define LEX_IS_STRINGQUOTE 8 | |
48 | #define LEX_IS_COLON 9 | |
49 | #define LEX_IS_NEWLINE 10 | |
50 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
51 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
52 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
53 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
54 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
55 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
56 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
57 | ||
385ce433 JL |
58 | static int process_escape PARAMS ((int)); |
59 | ||
a39116f1 RP |
60 | /* FIXME-soon: The entire lexer/parser thingy should be |
61 | built statically at compile time rather than dynamically | |
62 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 | 63 | |
6efd877d KR |
64 | void |
65 | do_scrub_begin () | |
66 | { | |
67 | const char *p; | |
68 | ||
69 | lex[' '] = LEX_IS_WHITESPACE; | |
70 | lex['\t'] = LEX_IS_WHITESPACE; | |
71 | lex['\n'] = LEX_IS_NEWLINE; | |
72 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
73 | lex['"'] = LEX_IS_STRINGQUOTE; | |
58d4951d | 74 | #ifndef TC_HPPA |
6efd877d | 75 | lex['\''] = LEX_IS_ONECHAR_QUOTE; |
58d4951d | 76 | #endif |
6efd877d | 77 | lex[':'] = LEX_IS_COLON; |
7c2d4011 | 78 | |
be06bdcd SC |
79 | |
80 | ||
81 | #ifdef SINGLE_QUOTE_STRINGS | |
82 | lex['\''] = LEX_IS_STRINGQUOTE; | |
7c2d4011 | 83 | #endif |
be06bdcd | 84 | |
6efd877d | 85 | /* Note that these override the previous defaults, e.g. if ';' |
be06bdcd | 86 | |
fecd2382 | 87 | is a comment char, then it isn't a line separator. */ |
6efd877d KR |
88 | for (p = symbol_chars; *p; ++p) |
89 | { | |
58d4951d | 90 | lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; |
6efd877d KR |
91 | } /* declare symbol characters */ |
92 | ||
6efd877d KR |
93 | for (p = comment_chars; *p; p++) |
94 | { | |
58d4951d | 95 | lex[(unsigned char) *p] = LEX_IS_COMMENT_START; |
6efd877d KR |
96 | } /* declare comment chars */ |
97 | ||
9a7d824a ILT |
98 | for (p = line_comment_chars; *p; p++) |
99 | { | |
58d4951d | 100 | lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START; |
9a7d824a ILT |
101 | } /* declare line comment chars */ |
102 | ||
6efd877d KR |
103 | for (p = line_separator_chars; *p; p++) |
104 | { | |
58d4951d | 105 | lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR; |
6efd877d KR |
106 | } /* declare line separators */ |
107 | ||
108 | /* Only allow slash-star comments if slash is not in use */ | |
109 | if (lex['/'] == 0) | |
110 | { | |
111 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; | |
112 | } | |
113 | /* FIXME-soon. This is a bad hack but otherwise, we | |
a39116f1 RP |
114 | can't do c-style comments when '/' is a line |
115 | comment char. xoxorich. */ | |
6efd877d KR |
116 | if (lex['*'] == 0) |
117 | { | |
118 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
119 | } | |
120 | } /* do_scrub_begin() */ | |
fecd2382 RP |
121 | |
122 | FILE *scrub_file; | |
123 | ||
6efd877d KR |
124 | int |
125 | scrub_from_file () | |
126 | { | |
127 | return getc (scrub_file); | |
fecd2382 RP |
128 | } |
129 | ||
6efd877d KR |
130 | void |
131 | scrub_to_file (ch) | |
132 | int ch; | |
fecd2382 | 133 | { |
6efd877d KR |
134 | ungetc (ch, scrub_file); |
135 | } /* scrub_to_file() */ | |
fecd2382 RP |
136 | |
137 | char *scrub_string; | |
138 | char *scrub_last_string; | |
139 | ||
6efd877d KR |
140 | int |
141 | scrub_from_string () | |
142 | { | |
143 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
144 | } /* scrub_from_string() */ | |
fecd2382 | 145 | |
6efd877d KR |
146 | void |
147 | scrub_to_string (ch) | |
148 | int ch; | |
fecd2382 | 149 | { |
6efd877d KR |
150 | *--scrub_string = ch; |
151 | } /* scrub_to_string() */ | |
fecd2382 RP |
152 | |
153 | /* Saved state of the scrubber */ | |
154 | static int state; | |
155 | static int old_state; | |
156 | static char *out_string; | |
157 | static char out_buf[20]; | |
158 | static int add_newlines = 0; | |
159 | ||
160 | /* Data structure for saving the state of app across #include's. Note that | |
161 | app is called asynchronously to the parsing of the .include's, so our | |
162 | state at the time .include is interpreted is completely unrelated. | |
163 | That's why we have to save it all. */ | |
164 | ||
6efd877d KR |
165 | struct app_save |
166 | { | |
167 | int state; | |
168 | int old_state; | |
169 | char *out_string; | |
170 | char out_buf[sizeof (out_buf)]; | |
171 | int add_newlines; | |
172 | char *scrub_string; | |
173 | char *scrub_last_string; | |
174 | FILE *scrub_file; | |
175 | }; | |
176 | ||
177 | char * | |
178 | app_push () | |
179 | { | |
7c2d4011 SC |
180 | register struct app_save *saved; |
181 | ||
6efd877d KR |
182 | saved = (struct app_save *) xmalloc (sizeof (*saved)); |
183 | saved->state = state; | |
184 | saved->old_state = old_state; | |
185 | saved->out_string = out_string; | |
58d4951d | 186 | memcpy (saved->out_buf, out_buf, sizeof (out_buf)); |
6efd877d KR |
187 | saved->add_newlines = add_newlines; |
188 | saved->scrub_string = scrub_string; | |
7c2d4011 | 189 | saved->scrub_last_string = scrub_last_string; |
6efd877d | 190 | saved->scrub_file = scrub_file; |
7c2d4011 SC |
191 | |
192 | /* do_scrub_begin() is not useful, just wastes time. */ | |
6efd877d | 193 | return (char *) saved; |
fecd2382 RP |
194 | } |
195 | ||
6efd877d KR |
196 | void |
197 | app_pop (arg) | |
198 | char *arg; | |
fecd2382 | 199 | { |
6efd877d KR |
200 | register struct app_save *saved = (struct app_save *) arg; |
201 | ||
202 | /* There is no do_scrub_end (). */ | |
203 | state = saved->state; | |
204 | old_state = saved->old_state; | |
205 | out_string = saved->out_string; | |
58d4951d | 206 | memcpy (out_buf, saved->out_buf, sizeof (out_buf)); |
6efd877d KR |
207 | add_newlines = saved->add_newlines; |
208 | scrub_string = saved->scrub_string; | |
209 | scrub_last_string = saved->scrub_last_string; | |
210 | scrub_file = saved->scrub_file; | |
211 | ||
212 | free (arg); | |
213 | } /* app_pop() */ | |
214 | ||
6d331d71 KR |
215 | /* @@ This assumes that \n &c are the same on host and target. This is not |
216 | necessarily true. */ | |
385ce433 | 217 | static int |
6efd877d | 218 | process_escape (ch) |
385ce433 | 219 | int ch; |
7c2d4011 | 220 | { |
6efd877d KR |
221 | switch (ch) |
222 | { | |
223 | case 'b': | |
224 | return '\b'; | |
225 | case 'f': | |
226 | return '\f'; | |
227 | case 'n': | |
228 | return '\n'; | |
229 | case 'r': | |
230 | return '\r'; | |
231 | case 't': | |
232 | return '\t'; | |
233 | case '\'': | |
234 | return '\''; | |
235 | case '"': | |
6d331d71 | 236 | return '\"'; |
6efd877d KR |
237 | default: |
238 | return ch; | |
239 | } | |
7c2d4011 | 240 | } |
6efd877d KR |
241 | int |
242 | do_scrub_next_char (get, unget) | |
243 | int (*get) (); | |
244 | void (*unget) (); | |
fecd2382 | 245 | { |
6efd877d | 246 | /*State 0: beginning of normal line |
a39116f1 RP |
247 | 1: After first whitespace on line (flush more white) |
248 | 2: After first non-white (opcode) on line (keep 1white) | |
249 | 3: after second white on line (into operands) (flush white) | |
250 | 4: after putting out a .line, put out digits | |
251 | 5: parsing a string, then go to old-state | |
252 | 6: putting out \ escape in a "d string. | |
9a7d824a ILT |
253 | 7: After putting out a .appfile, put out string. |
254 | 8: After putting out a .appfile string, flush until newline. | |
f6a91cc0 | 255 | 9: After seeing symbol char in state 3 (keep 1white after symchar) |
9a7d824a | 256 | 10: After seeing whitespace in state 9 (keep white before symchar) |
a39116f1 RP |
257 | -1: output string in out_string and go to the state in old_state |
258 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
259 | */ | |
6efd877d | 260 | |
9a7d824a ILT |
261 | /* I added states 9 and 10 because the MIPS ECOFF assembler uses |
262 | constructs like ``.loc 1 20''. This was turning into ``.loc | |
263 | 120''. States 9 and 10 ensure that a space is never dropped in | |
264 | between characters which could appear in a identifier. Ian | |
265 | Taylor, [email protected]. */ | |
f6a91cc0 | 266 | |
6efd877d | 267 | register int ch, ch2 = 0; |
385ce433 | 268 | int not_cpp_line = 0; |
6efd877d KR |
269 | |
270 | switch (state) | |
271 | { | |
272 | case -1: | |
273 | ch = *out_string++; | |
274 | if (*out_string == 0) | |
275 | { | |
276 | state = old_state; | |
277 | old_state = 3; | |
278 | } | |
279 | return ch; | |
280 | ||
281 | case -2: | |
282 | for (;;) | |
283 | { | |
284 | do | |
285 | { | |
286 | ch = (*get) (); | |
287 | } | |
288 | while (ch != EOF && ch != '\n' && ch != '*'); | |
289 | if (ch == '\n' || ch == EOF) | |
290 | return ch; | |
291 | ||
292 | /* At this point, ch must be a '*' */ | |
293 | while ((ch = (*get) ()) == '*') | |
294 | { | |
295 | ; | |
296 | } | |
297 | if (ch == EOF || ch == '/') | |
298 | break; | |
299 | (*unget) (ch); | |
300 | } | |
301 | state = old_state; | |
302 | return ' '; | |
303 | ||
304 | case 4: | |
305 | ch = (*get) (); | |
306 | if (ch == EOF || (ch >= '0' && ch <= '9')) | |
307 | return ch; | |
308 | else | |
309 | { | |
310 | while (ch != EOF && IS_WHITESPACE (ch)) | |
311 | ch = (*get) (); | |
312 | if (ch == '"') | |
313 | { | |
314 | (*unget) (ch); | |
001581c7 | 315 | out_string = "\n\t.appfile "; |
6efd877d KR |
316 | old_state = 7; |
317 | state = -1; | |
318 | return *out_string++; | |
319 | } | |
320 | else | |
321 | { | |
322 | while (ch != EOF && ch != '\n') | |
323 | ch = (*get) (); | |
58d4951d | 324 | state = 0; |
6efd877d KR |
325 | return ch; |
326 | } | |
327 | } | |
328 | ||
329 | case 5: | |
330 | ch = (*get) (); | |
331 | if (lex[ch] == LEX_IS_STRINGQUOTE) | |
332 | { | |
333 | state = old_state; | |
334 | return ch; | |
335 | } | |
336 | else if (ch == '\\') | |
337 | { | |
338 | state = 6; | |
339 | return ch; | |
340 | } | |
341 | else if (ch == EOF) | |
342 | { | |
343 | as_warn ("End of file in string: inserted '\"'"); | |
344 | state = old_state; | |
345 | (*unget) ('\n'); | |
346 | return '"'; | |
347 | } | |
348 | else | |
349 | { | |
350 | return ch; | |
351 | } | |
352 | ||
353 | case 6: | |
354 | state = 5; | |
355 | ch = (*get) (); | |
356 | switch (ch) | |
357 | { | |
6d331d71 KR |
358 | /* Handle strings broken across lines, by turning '\n' into |
359 | '\\' and 'n'. */ | |
6efd877d KR |
360 | case '\n': |
361 | (*unget) ('n'); | |
362 | add_newlines++; | |
363 | return '\\'; | |
364 | ||
365 | case '"': | |
366 | case '\\': | |
367 | case 'b': | |
368 | case 'f': | |
369 | case 'n': | |
370 | case 'r': | |
371 | case 't': | |
fecd2382 | 372 | #ifdef BACKSLASH_V |
6efd877d | 373 | case 'v': |
fecd2382 | 374 | #endif /* BACKSLASH_V */ |
385ce433 JL |
375 | case 'x': |
376 | case 'X': | |
6efd877d KR |
377 | case '0': |
378 | case '1': | |
379 | case '2': | |
380 | case '3': | |
381 | case '4': | |
382 | case '5': | |
383 | case '6': | |
384 | case '7': | |
385 | break; | |
7c2d4011 | 386 | #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES) |
6efd877d KR |
387 | default: |
388 | as_warn ("Unknown escape '\\%c' in string: Ignored", ch); | |
389 | break; | |
fecd2382 | 390 | #else /* ONLY_STANDARD_ESCAPES */ |
6efd877d KR |
391 | default: |
392 | /* Accept \x as x for any x */ | |
393 | break; | |
fecd2382 | 394 | #endif /* ONLY_STANDARD_ESCAPES */ |
7c2d4011 | 395 | |
6efd877d KR |
396 | case EOF: |
397 | as_warn ("End of file in string: '\"' inserted"); | |
398 | return '"'; | |
399 | } | |
400 | return ch; | |
401 | ||
402 | case 7: | |
403 | ch = (*get) (); | |
404 | state = 5; | |
405 | old_state = 8; | |
406 | return ch; | |
407 | ||
408 | case 8: | |
409 | do | |
410 | ch = (*get) (); | |
411 | while (ch != '\n'); | |
412 | state = 0; | |
413 | return ch; | |
414 | } | |
415 | ||
9a7d824a | 416 | /* OK, we are somewhere in states 0 through 4 or 9 through 10 */ |
6efd877d KR |
417 | |
418 | /* flushchar: */ | |
419 | ch = (*get) (); | |
420 | recycle: | |
421 | if (ch == EOF) | |
422 | { | |
423 | if (state != 0) | |
424 | as_warn ("End of file not at end of a line: Newline inserted."); | |
425 | return ch; | |
426 | } | |
427 | ||
428 | switch (lex[ch]) | |
429 | { | |
430 | case LEX_IS_WHITESPACE: | |
431 | do | |
385ce433 JL |
432 | /* Preserve a single whitespace character at the beginning of |
433 | a line. */ | |
434 | if (state == 0) | |
435 | { | |
436 | state = 1; | |
437 | return ch; | |
438 | } | |
439 | else | |
440 | ch = (*get) (); | |
6efd877d KR |
441 | while (ch != EOF && IS_WHITESPACE (ch)); |
442 | if (ch == EOF) | |
443 | return ch; | |
444 | ||
445 | if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch)) | |
446 | { | |
385ce433 JL |
447 | /* cpp never outputs a leading space before the #, so try to |
448 | avoid being confused. */ | |
449 | not_cpp_line = 1; | |
6efd877d | 450 | goto recycle; |
fecd2382 | 451 | } |
5a051773 SS |
452 | #ifdef MRI |
453 | (*unget) (ch); /* Put back */ | |
454 | return ' '; /* Always return one space at start of line */ | |
455 | #endif | |
6efd877d KR |
456 | |
457 | /* If we're in state 2, we've seen a non-white | |
6d331d71 KR |
458 | character followed by whitespace. If the next |
459 | character is ':', this is whitespace after a label | |
460 | name which we can ignore. */ | |
6efd877d KR |
461 | if (state == 2 && lex[ch] == LEX_IS_COLON) |
462 | { | |
463 | state = 0; | |
464 | return ch; | |
465 | } | |
466 | ||
467 | switch (state) | |
468 | { | |
469 | case 0: | |
470 | state++; | |
471 | goto recycle; /* Punted leading sp */ | |
472 | case 1: | |
385ce433 JL |
473 | /* We can arrive here if we leave a leading whitespace character |
474 | at the beginning of a line. */ | |
475 | goto recycle; | |
6efd877d | 476 | case 2: |
f6a91cc0 | 477 | state = 3; |
6efd877d KR |
478 | (*unget) (ch); |
479 | return ' '; /* Sp after opco */ | |
480 | case 3: | |
481 | goto recycle; /* Sp in operands */ | |
9a7d824a ILT |
482 | case 9: |
483 | case 10: | |
484 | state = 10; /* Sp after symbol char */ | |
485 | goto recycle; | |
6efd877d KR |
486 | default: |
487 | BAD_CASE (state); | |
488 | } | |
489 | break; | |
490 | ||
491 | case LEX_IS_TWOCHAR_COMMENT_1ST: | |
492 | ch2 = (*get) (); | |
493 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) | |
494 | { | |
495 | for (;;) | |
496 | { | |
497 | do | |
498 | { | |
499 | ch2 = (*get) (); | |
500 | if (ch2 != EOF && IS_NEWLINE (ch2)) | |
501 | add_newlines++; | |
fecd2382 | 502 | } |
6efd877d KR |
503 | while (ch2 != EOF && |
504 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); | |
505 | ||
506 | while (ch2 != EOF && | |
507 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)) | |
508 | { | |
509 | ch2 = (*get) (); | |
fecd2382 | 510 | } |
6efd877d KR |
511 | |
512 | if (ch2 == EOF | |
513 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) | |
fecd2382 | 514 | break; |
6efd877d KR |
515 | (*unget) (ch); |
516 | } | |
517 | if (ch2 == EOF) | |
518 | as_warn ("End of file in multiline comment"); | |
519 | ||
520 | ch = ' '; | |
521 | goto recycle; | |
522 | } | |
523 | else | |
524 | { | |
525 | if (ch2 != EOF) | |
526 | (*unget) (ch2); | |
9a7d824a ILT |
527 | if (state == 9 || state == 10) |
528 | state = 3; | |
6efd877d KR |
529 | return ch; |
530 | } | |
531 | break; | |
532 | ||
533 | case LEX_IS_STRINGQUOTE: | |
9a7d824a ILT |
534 | if (state == 9 || state == 10) |
535 | old_state = 3; | |
536 | else | |
537 | old_state = state; | |
6efd877d KR |
538 | state = 5; |
539 | return ch; | |
540 | #ifndef MRI | |
a39116f1 | 541 | #ifndef IEEE_STYLE |
6efd877d KR |
542 | case LEX_IS_ONECHAR_QUOTE: |
543 | ch = (*get) (); | |
544 | if (ch == EOF) | |
545 | { | |
546 | as_warn ("End-of-file after a one-character quote; \\000 inserted"); | |
547 | ch = 0; | |
548 | } | |
549 | if (ch == '\\') | |
550 | { | |
551 | ch = (*get) (); | |
552 | ch = process_escape (ch); | |
553 | } | |
554 | sprintf (out_buf, "%d", (int) (unsigned char) ch); | |
7c2d4011 | 555 | |
6efd877d | 556 | |
9a7d824a | 557 | /* None of these 'x constants for us. We want 'x'. */ |
6efd877d KR |
558 | if ((ch = (*get) ()) != '\'') |
559 | { | |
fecd2382 | 560 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE |
6efd877d | 561 | as_warn ("Missing close quote: (assumed)"); |
fecd2382 | 562 | #else |
6efd877d | 563 | (*unget) (ch); |
fecd2382 | 564 | #endif |
6efd877d KR |
565 | } |
566 | if (strlen (out_buf) == 1) | |
567 | { | |
568 | return out_buf[0]; | |
569 | } | |
9a7d824a ILT |
570 | if (state == 9 || state == 10) |
571 | old_state = 3; | |
572 | else | |
573 | old_state = state; | |
6efd877d KR |
574 | state = -1; |
575 | out_string = out_buf; | |
576 | return *out_string++; | |
7c2d4011 | 577 | #endif |
a39116f1 | 578 | #endif |
6efd877d | 579 | case LEX_IS_COLON: |
9a7d824a ILT |
580 | if (state == 9 || state == 10) |
581 | state = 3; | |
582 | else if (state != 3) | |
6efd877d KR |
583 | state = 0; |
584 | return ch; | |
585 | ||
586 | case LEX_IS_NEWLINE: | |
587 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
588 | if (add_newlines) | |
589 | { | |
590 | --add_newlines; | |
591 | (*unget) (ch); | |
592 | } | |
593 | /* fall thru into... */ | |
594 | ||
595 | case LEX_IS_LINE_SEPARATOR: | |
596 | state = 0; | |
597 | return ch; | |
598 | ||
599 | case LEX_IS_LINE_COMMENT_START: | |
9a7d824a | 600 | if (state == 0) /* Only comment at start of line. */ |
6efd877d | 601 | { |
9a7d824a ILT |
602 | /* FIXME-someday: The two character comment stuff was badly |
603 | thought out. On i386, we want '/' as line comment start | |
604 | AND we want C style comments. hence this hack. The | |
605 | whole lexical process should be reworked. xoxorich. */ | |
606 | if (ch == '/') | |
f6a91cc0 | 607 | { |
9a7d824a ILT |
608 | ch2 = (*get) (); |
609 | if (ch2 == '*') | |
610 | { | |
611 | state = -2; | |
612 | return (do_scrub_next_char (get, unget)); | |
613 | } | |
614 | else | |
615 | { | |
616 | (*unget) (ch2); | |
617 | } | |
618 | } /* bad hack */ | |
6efd877d | 619 | |
385ce433 JL |
620 | if (ch != '#') |
621 | not_cpp_line = 1; | |
622 | ||
9a7d824a | 623 | do |
6efd877d | 624 | ch = (*get) (); |
9a7d824a | 625 | while (ch != EOF && IS_WHITESPACE (ch)); |
6efd877d | 626 | if (ch == EOF) |
9a7d824a ILT |
627 | { |
628 | as_warn ("EOF in comment: Newline inserted"); | |
629 | return '\n'; | |
630 | } | |
385ce433 | 631 | if (ch < '0' || ch > '9' || not_cpp_line) |
9a7d824a ILT |
632 | { |
633 | /* Non-numerics: Eat whole comment line */ | |
634 | while (ch != EOF && !IS_NEWLINE (ch)) | |
635 | ch = (*get) (); | |
636 | if (ch == EOF) | |
637 | as_warn ("EOF in Comment: Newline inserted"); | |
638 | state = 0; | |
639 | return '\n'; | |
640 | } | |
641 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
642 | (*unget) (ch); | |
643 | old_state = 4; | |
644 | state = -1; | |
001581c7 | 645 | out_string = "\t.appline "; |
9a7d824a | 646 | return *out_string++; |
6efd877d | 647 | } |
6efd877d | 648 | |
9a7d824a ILT |
649 | /* We have a line comment character which is not at the start of |
650 | a line. If this is also a normal comment character, fall | |
651 | through. Otherwise treat it as a default character. */ | |
652 | if (strchr (comment_chars, ch) == NULL) | |
653 | goto de_fault; | |
654 | /* Fall through. */ | |
6efd877d KR |
655 | case LEX_IS_COMMENT_START: |
656 | do | |
657 | ch = (*get) (); | |
658 | while (ch != EOF && !IS_NEWLINE (ch)); | |
659 | if (ch == EOF) | |
660 | as_warn ("EOF in comment: Newline inserted"); | |
661 | state = 0; | |
662 | return '\n'; | |
663 | ||
f6a91cc0 | 664 | case LEX_IS_SYMBOL_COMPONENT: |
9a7d824a ILT |
665 | if (state == 10) |
666 | { | |
667 | /* This is a symbol character following another symbol | |
668 | character, with whitespace in between. We skipped the | |
669 | whitespace earlier, so output it now. */ | |
670 | (*unget) (ch); | |
671 | state = 3; | |
672 | return ' '; | |
673 | } | |
f6a91cc0 ILT |
674 | if (state == 3) |
675 | state = 9; | |
676 | /* Fall through. */ | |
6efd877d KR |
677 | default: |
678 | de_fault: | |
679 | /* Some relatively `normal' character. */ | |
680 | if (state == 0) | |
681 | { | |
682 | state = 2; /* Now seeing opcode */ | |
683 | return ch; | |
fecd2382 | 684 | } |
6efd877d KR |
685 | else if (state == 1) |
686 | { | |
687 | state = 2; /* Ditto */ | |
688 | return ch; | |
689 | } | |
f6a91cc0 ILT |
690 | else if (state == 9) |
691 | { | |
692 | if (lex[ch] != LEX_IS_SYMBOL_COMPONENT) | |
693 | state = 3; | |
694 | return ch; | |
695 | } | |
9a7d824a ILT |
696 | else if (state == 10) |
697 | { | |
698 | state = 3; | |
699 | return ch; | |
700 | } | |
6efd877d KR |
701 | else |
702 | { | |
703 | return ch; /* Opcode or operands already */ | |
704 | } | |
705 | } | |
706 | return -1; | |
fecd2382 RP |
707 | } |
708 | ||
709 | #ifdef TEST | |
710 | ||
6efd877d KR |
711 | const char comment_chars[] = "|"; |
712 | const char line_comment_chars[] = "#"; | |
fecd2382 | 713 | |
6efd877d | 714 | main () |
fecd2382 | 715 | { |
6efd877d KR |
716 | int ch; |
717 | ||
718 | app_begin (); | |
719 | while ((ch = do_scrub_next_char (stdin)) != EOF) | |
720 | putc (ch, stdout); | |
fecd2382 RP |
721 | } |
722 | ||
6efd877d KR |
723 | as_warn (str) |
724 | char *str; | |
fecd2382 | 725 | { |
6efd877d KR |
726 | fputs (str, stderr); |
727 | putc ('\n', stderr); | |
fecd2382 | 728 | } |
6efd877d | 729 | |
fecd2382 RP |
730 | #endif |
731 | ||
fecd2382 | 732 | /* end of app.c */ |