1 /* This is the Assembler Pre-Processor
2 Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
4 This file is part of GAS, the GNU Assembler.
6 GAS is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GAS is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GAS; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
21 /* App, the assembler pre-processor. This pre-processor strips out excess
22 spaces, turns single-quoted characters into a decimal constant, and turns
23 # <number> <filename> <garbage> into a .line <number>\n.file <filename>
24 pair. This needs better error-handling.
28 #include "as.h" /* For BAD_CASE() only */
32 #define const /* empty */
37 static const char symbol_chars[] =
38 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
40 #define LEX_IS_SYMBOL_COMPONENT 1
41 #define LEX_IS_WHITESPACE 2
42 #define LEX_IS_LINE_SEPARATOR 3
43 #define LEX_IS_COMMENT_START 4
44 #define LEX_IS_LINE_COMMENT_START 5
45 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
46 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
47 #define LEX_IS_STRINGQUOTE 8
48 #define LEX_IS_COLON 9
49 #define LEX_IS_NEWLINE 10
50 #define LEX_IS_ONECHAR_QUOTE 11
51 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
52 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
53 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
54 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
55 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
56 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
58 static int process_escape PARAMS ((int));
60 /* FIXME-soon: The entire lexer/parser thingy should be
61 built statically at compile time rather than dynamically
62 each and every time the assembler is run. xoxorich. */
69 lex[' '] = LEX_IS_WHITESPACE;
70 lex['\t'] = LEX_IS_WHITESPACE;
71 lex['\n'] = LEX_IS_NEWLINE;
72 lex[';'] = LEX_IS_LINE_SEPARATOR;
73 lex['"'] = LEX_IS_STRINGQUOTE;
75 lex['\''] = LEX_IS_ONECHAR_QUOTE;
77 lex[':'] = LEX_IS_COLON;
81 #ifdef SINGLE_QUOTE_STRINGS
82 lex['\''] = LEX_IS_STRINGQUOTE;
85 /* Note that these override the previous defaults, e.g. if ';'
87 is a comment char, then it isn't a line separator. */
88 for (p = symbol_chars; *p; ++p)
90 lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
91 } /* declare symbol characters */
93 for (p = comment_chars; *p; p++)
95 lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
96 } /* declare comment chars */
98 for (p = line_comment_chars; *p; p++)
100 lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
101 } /* declare line comment chars */
103 for (p = line_separator_chars; *p; p++)
105 lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
106 } /* declare line separators */
108 /* Only allow slash-star comments if slash is not in use */
111 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
113 /* FIXME-soon. This is a bad hack but otherwise, we
114 can't do c-style comments when '/' is a line
115 comment char. xoxorich. */
118 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
120 } /* do_scrub_begin() */
127 return getc (scrub_file);
134 ungetc (ch, scrub_file);
135 } /* scrub_to_file() */
138 char *scrub_last_string;
143 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
144 } /* scrub_from_string() */
150 *--scrub_string = ch;
151 } /* scrub_to_string() */
153 /* Saved state of the scrubber */
155 static int old_state;
156 static char *out_string;
157 static char out_buf[20];
158 static int add_newlines = 0;
160 /* Data structure for saving the state of app across #include's. Note that
161 app is called asynchronously to the parsing of the .include's, so our
162 state at the time .include is interpreted is completely unrelated.
163 That's why we have to save it all. */
170 char out_buf[sizeof (out_buf)];
173 char *scrub_last_string;
180 register struct app_save *saved;
182 saved = (struct app_save *) xmalloc (sizeof (*saved));
183 saved->state = state;
184 saved->old_state = old_state;
185 saved->out_string = out_string;
186 memcpy (saved->out_buf, out_buf, sizeof (out_buf));
187 saved->add_newlines = add_newlines;
188 saved->scrub_string = scrub_string;
189 saved->scrub_last_string = scrub_last_string;
190 saved->scrub_file = scrub_file;
192 /* do_scrub_begin() is not useful, just wastes time. */
193 return (char *) saved;
200 register struct app_save *saved = (struct app_save *) arg;
202 /* There is no do_scrub_end (). */
203 state = saved->state;
204 old_state = saved->old_state;
205 out_string = saved->out_string;
206 memcpy (out_buf, saved->out_buf, sizeof (out_buf));
207 add_newlines = saved->add_newlines;
208 scrub_string = saved->scrub_string;
209 scrub_last_string = saved->scrub_last_string;
210 scrub_file = saved->scrub_file;
215 /* @@ This assumes that \n &c are the same on host and target. This is not
242 do_scrub_next_char (get, unget)
246 /*State 0: beginning of normal line
247 1: After first whitespace on line (flush more white)
248 2: After first non-white (opcode) on line (keep 1white)
249 3: after second white on line (into operands) (flush white)
250 4: after putting out a .line, put out digits
251 5: parsing a string, then go to old-state
252 6: putting out \ escape in a "d string.
253 7: After putting out a .appfile, put out string.
254 8: After putting out a .appfile string, flush until newline.
255 9: After seeing symbol char in state 3 (keep 1white after symchar)
256 10: After seeing whitespace in state 9 (keep white before symchar)
257 -1: output string in out_string and go to the state in old_state
258 -2: flush text until a '*' '/' is seen, then go to state old_state
261 /* I added states 9 and 10 because the MIPS ECOFF assembler uses
262 constructs like ``.loc 1 20''. This was turning into ``.loc
263 120''. States 9 and 10 ensure that a space is never dropped in
264 between characters which could appear in a identifier. Ian
267 register int ch, ch2 = 0;
268 int not_cpp_line = 0;
274 if (*out_string == 0)
288 while (ch != EOF && ch != '\n' && ch != '*');
289 if (ch == '\n' || ch == EOF)
292 /* At this point, ch must be a '*' */
293 while ((ch = (*get) ()) == '*')
297 if (ch == EOF || ch == '/')
306 if (ch == EOF || (ch >= '0' && ch <= '9'))
310 while (ch != EOF && IS_WHITESPACE (ch))
315 out_string = "\n\t.appfile ";
318 return *out_string++;
322 while (ch != EOF && ch != '\n')
331 if (lex[ch] == LEX_IS_STRINGQUOTE)
343 as_warn ("End of file in string: inserted '\"'");
358 /* Handle strings broken across lines, by turning '\n' into
374 #endif /* BACKSLASH_V */
386 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
388 as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
390 #else /* ONLY_STANDARD_ESCAPES */
392 /* Accept \x as x for any x */
394 #endif /* ONLY_STANDARD_ESCAPES */
397 as_warn ("End of file in string: '\"' inserted");
416 /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
424 as_warn ("End of file not at end of a line: Newline inserted.");
430 case LEX_IS_WHITESPACE:
432 /* Preserve a single whitespace character at the beginning of
441 while (ch != EOF && IS_WHITESPACE (ch));
445 if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
447 /* cpp never outputs a leading space before the #, so try to
448 avoid being confused. */
453 (*unget) (ch); /* Put back */
454 return ' '; /* Always return one space at start of line */
457 /* If we're in state 2, we've seen a non-white
458 character followed by whitespace. If the next
459 character is ':', this is whitespace after a label
460 name which we can ignore. */
461 if (state == 2 && lex[ch] == LEX_IS_COLON)
471 goto recycle; /* Punted leading sp */
473 /* We can arrive here if we leave a leading whitespace character
474 at the beginning of a line. */
479 return ' '; /* Sp after opco */
481 goto recycle; /* Sp in operands */
484 state = 10; /* Sp after symbol char */
491 case LEX_IS_TWOCHAR_COMMENT_1ST:
493 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
500 if (ch2 != EOF && IS_NEWLINE (ch2))
504 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
507 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
513 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
518 as_warn ("End of file in multiline comment");
527 if (state == 9 || state == 10)
533 case LEX_IS_STRINGQUOTE:
534 if (state == 9 || state == 10)
542 case LEX_IS_ONECHAR_QUOTE:
546 as_warn ("End-of-file after a one-character quote; \\000 inserted");
552 ch = process_escape (ch);
554 sprintf (out_buf, "%d", (int) (unsigned char) ch);
557 /* None of these 'x constants for us. We want 'x'. */
558 if ((ch = (*get) ()) != '\'')
560 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
561 as_warn ("Missing close quote: (assumed)");
566 if (strlen (out_buf) == 1)
570 if (state == 9 || state == 10)
575 out_string = out_buf;
576 return *out_string++;
580 if (state == 9 || state == 10)
587 /* Roll out a bunch of newlines from inside comments, etc. */
593 /* fall thru into... */
595 case LEX_IS_LINE_SEPARATOR:
599 case LEX_IS_LINE_COMMENT_START:
600 if (state == 0) /* Only comment at start of line. */
602 /* FIXME-someday: The two character comment stuff was badly
603 thought out. On i386, we want '/' as line comment start
604 AND we want C style comments. hence this hack. The
605 whole lexical process should be reworked. xoxorich. */
612 return (do_scrub_next_char (get, unget));
625 while (ch != EOF && IS_WHITESPACE (ch));
628 as_warn ("EOF in comment: Newline inserted");
631 if (ch < '0' || ch > '9' || not_cpp_line)
633 /* Non-numerics: Eat whole comment line */
634 while (ch != EOF && !IS_NEWLINE (ch))
637 as_warn ("EOF in Comment: Newline inserted");
641 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
645 out_string = "\t.appline ";
646 return *out_string++;
649 /* We have a line comment character which is not at the start of
650 a line. If this is also a normal comment character, fall
651 through. Otherwise treat it as a default character. */
652 if (strchr (comment_chars, ch) == NULL)
655 case LEX_IS_COMMENT_START:
658 while (ch != EOF && !IS_NEWLINE (ch));
660 as_warn ("EOF in comment: Newline inserted");
664 case LEX_IS_SYMBOL_COMPONENT:
667 /* This is a symbol character following another symbol
668 character, with whitespace in between. We skipped the
669 whitespace earlier, so output it now. */
679 /* Some relatively `normal' character. */
682 state = 2; /* Now seeing opcode */
687 state = 2; /* Ditto */
692 if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
696 else if (state == 10)
703 return ch; /* Opcode or operands already */
711 const char comment_chars[] = "|";
712 const char line_comment_chars[] = "#";
719 while ((ch = do_scrub_next_char (stdin)) != EOF)