1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
8 This file is part of GAS, the GNU Assembler.
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
27 pair. This needs better error-handling.
31 #include "as.h" /* For BAD_CASE() only */
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
38 static const char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
41 #define LEX_IS_SYMBOL_COMPONENT 1
42 #define LEX_IS_WHITESPACE 2
43 #define LEX_IS_LINE_SEPARATOR 3
44 #define LEX_IS_COMMENT_START 4
45 #define LEX_IS_LINE_COMMENT_START 5
46 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
47 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
48 #define LEX_IS_STRINGQUOTE 8
49 #define LEX_IS_COLON 9
50 #define LEX_IS_NEWLINE 10
51 #define LEX_IS_ONECHAR_QUOTE 11
52 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
53 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
54 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
55 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
56 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
57 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
59 /* FIXME-soon: The entire lexer/parser thingy should be
60 built statically at compile time rather than dynamically
61 each and every time the assembler is run. xoxorich. */
68 lex[' '] = LEX_IS_WHITESPACE;
69 lex['\t'] = LEX_IS_WHITESPACE;
70 lex['\n'] = LEX_IS_NEWLINE;
71 lex[';'] = LEX_IS_LINE_SEPARATOR;
72 lex['"'] = LEX_IS_STRINGQUOTE;
73 lex['\''] = LEX_IS_ONECHAR_QUOTE;
74 lex[':'] = LEX_IS_COLON;
78 #ifdef SINGLE_QUOTE_STRINGS
79 lex['\''] = LEX_IS_STRINGQUOTE;
82 /* Note that these override the previous defaults, e.g. if ';'
84 is a comment char, then it isn't a line separator. */
85 for (p = symbol_chars; *p; ++p)
87 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
88 } /* declare symbol characters */
90 for (p = comment_chars; *p; p++)
92 lex[*p] = LEX_IS_COMMENT_START;
93 } /* declare comment chars */
95 for (p = line_comment_chars; *p; p++)
97 lex[*p] = LEX_IS_LINE_COMMENT_START;
98 } /* declare line comment chars */
100 for (p = line_separator_chars; *p; p++)
102 lex[*p] = LEX_IS_LINE_SEPARATOR;
103 } /* declare line separators */
105 /* Only allow slash-star comments if slash is not in use */
108 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
110 /* FIXME-soon. This is a bad hack but otherwise, we
111 can't do c-style comments when '/' is a line
112 comment char. xoxorich. */
115 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
117 } /* do_scrub_begin() */
124 return getc (scrub_file);
131 ungetc (ch, scrub_file);
132 } /* scrub_to_file() */
135 char *scrub_last_string;
140 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
141 } /* scrub_from_string() */
147 *--scrub_string = ch;
148 } /* scrub_to_string() */
150 /* Saved state of the scrubber */
152 static int old_state;
153 static char *out_string;
154 static char out_buf[20];
155 static int add_newlines = 0;
157 /* Data structure for saving the state of app across #include's. Note that
158 app is called asynchronously to the parsing of the .include's, so our
159 state at the time .include is interpreted is completely unrelated.
160 That's why we have to save it all. */
167 char out_buf[sizeof (out_buf)];
170 char *scrub_last_string;
177 register struct app_save *saved;
179 saved = (struct app_save *) xmalloc (sizeof (*saved));
180 saved->state = state;
181 saved->old_state = old_state;
182 saved->out_string = out_string;
183 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
184 saved->add_newlines = add_newlines;
185 saved->scrub_string = scrub_string;
186 saved->scrub_last_string = scrub_last_string;
187 saved->scrub_file = scrub_file;
189 /* do_scrub_begin() is not useful, just wastes time. */
190 return (char *) saved;
197 register struct app_save *saved = (struct app_save *) arg;
199 /* There is no do_scrub_end (). */
200 state = saved->state;
201 old_state = saved->old_state;
202 out_string = saved->out_string;
203 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
204 add_newlines = saved->add_newlines;
205 scrub_string = saved->scrub_string;
206 scrub_last_string = saved->scrub_last_string;
207 scrub_file = saved->scrub_file;
212 /* @@ This assumes that \n &c are the same on host and target. This is not
239 do_scrub_next_char (get, unget)
243 /*State 0: beginning of normal line
244 1: After first whitespace on line (flush more white)
245 2: After first non-white (opcode) on line (keep 1white)
246 3: after second white on line (into operands) (flush white)
247 4: after putting out a .line, put out digits
248 5: parsing a string, then go to old-state
249 6: putting out \ escape in a "d string.
250 7: After putting out a .appfile, put out string.
251 8: After putting out a .appfile string, flush until newline.
252 9: After seeing symbol char in state 3 (keep 1white after symchar)
253 10: After seeing whitespace in state 9 (keep white before symchar)
254 -1: output string in out_string and go to the state in old_state
255 -2: flush text until a '*' '/' is seen, then go to state old_state
258 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
259 constructs like ``.loc 1 20''. This was turning into ``.loc
260 120''. States 9 and 10 ensure that a space is never dropped in
261 between characters which could appear in a identifier. Ian
264 register int ch, ch2 = 0;
270 if (*out_string == 0)
284 while (ch != EOF && ch != '\n' && ch != '*');
285 if (ch == '\n' || ch == EOF)
288 /* At this point, ch must be a '*' */
289 while ((ch = (*get) ()) == '*')
293 if (ch == EOF || ch == '/')
302 if (ch == EOF || (ch >= '0' && ch <= '9'))
306 while (ch != EOF && IS_WHITESPACE (ch))
311 out_string = "\n.appfile ";
314 return *out_string++;
318 while (ch != EOF && ch != '\n')
326 if (lex[ch] == LEX_IS_STRINGQUOTE)
338 as_warn ("End of file in string: inserted '\"'");
353 /* Handle strings broken across lines, by turning '\n' into
369 #endif /* BACKSLASH_V */
379 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
381 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
383 #else /* ONLY_STANDARD_ESCAPES */
385 /* Accept \x as x for any x */
387 #endif /* ONLY_STANDARD_ESCAPES */
390 as_warn ("End of file in string: '\"' inserted");
409 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
417 as_warn ("End of file not at end of a line: Newline inserted.");
423 case LEX_IS_WHITESPACE:
426 while (ch != EOF && IS_WHITESPACE (ch));
430 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
435 (*unget) (ch); /* Put back */
436 return ' '; /* Always return one space at start of line */
439 /* If we're in state 2, we've seen a non-white
440 character followed by whitespace. If the next
441 character is ':', this is whitespace after a label
442 name which we can ignore. */
443 if (state == 2 && lex[ch] == LEX_IS_COLON)
453 goto recycle; /* Punted leading sp */
455 BAD_CASE (state); /* We can't get here */
459 return ' '; /* Sp after opco */
461 goto recycle; /* Sp in operands */
464 state = 10; /* Sp after symbol char */
471 case LEX_IS_TWOCHAR_COMMENT_1ST:
473 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
480 if (ch2 != EOF && IS_NEWLINE (ch2))
484 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
487 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
493 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
498 as_warn ("End of file in multiline comment");
507 if (state == 9 || state == 10)
513 case LEX_IS_STRINGQUOTE:
514 if (state == 9 || state == 10)
522 case LEX_IS_ONECHAR_QUOTE:
526 as_warn ("End-of-file after a one-character quote; \\000 inserted");
532 ch = process_escape (ch);
534 sprintf (out_buf, "%d", (int) (unsigned char) ch);
537 /* None of these 'x constants for us. We want 'x'. */
538 if ((ch = (*get) ()) != '\'')
540 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
541 as_warn ("Missing close quote: (assumed)");
546 if (strlen (out_buf) == 1)
550 if (state == 9 || state == 10)
555 out_string = out_buf;
556 return *out_string++;
560 if (state == 9 || state == 10)
567 /* Roll out a bunch of newlines from inside comments, etc. */
573 /* fall thru into... */
575 case LEX_IS_LINE_SEPARATOR:
579 case LEX_IS_LINE_COMMENT_START:
580 if (state == 0) /* Only comment at start of line. */
582 /* FIXME-someday: The two character comment stuff was badly
583 thought out. On i386, we want '/' as line comment start
584 AND we want C style comments. hence this hack. The
585 whole lexical process should be reworked. xoxorich. */
592 return (do_scrub_next_char (get, unget));
602 while (ch != EOF && IS_WHITESPACE (ch));
605 as_warn ("EOF in comment: Newline inserted");
608 if (ch < '0' || ch > '9')
610 /* Non-numerics: Eat whole comment line */
611 while (ch != EOF && !IS_NEWLINE (ch))
614 as_warn ("EOF in Comment: Newline inserted");
618 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
622 out_string = ".appline ";
623 return *out_string++;
626 /* We have a line comment character which is not at the start of
627 a line. If this is also a normal comment character, fall
628 through. Otherwise treat it as a default character. */
629 if (strchr (comment_chars, ch) == NULL)
632 case LEX_IS_COMMENT_START:
635 while (ch != EOF && !IS_NEWLINE (ch));
637 as_warn ("EOF in comment: Newline inserted");
641 case LEX_IS_SYMBOL_COMPONENT:
644 /* This is a symbol character following another symbol
645 character, with whitespace in between. We skipped the
646 whitespace earlier, so output it now. */
656 /* Some relatively `normal' character. */
659 state = 2; /* Now seeing opcode */
664 state = 2; /* Ditto */
669 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
673 else if (state == 10)
680 return ch; /* Opcode or operands already */
688 const char comment_chars[] = "|";
689 const char line_comment_chars[] = "#";
696 while ((ch = do_scrub_next_char (stdin)) != EOF)