1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
3 Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
5 /* This is the Assembler Pre-Processor
6 Copyright (C) 1987 Free Software Foundation, Inc.
8 This file is part of GAS, the GNU Assembler.
10 GAS is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
15 GAS is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GAS; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
24 /* App, the assembler pre-processor. This pre-processor strips out excess
25 spaces, turns single-quoted characters into a decimal constant, and turns
26 # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
27 This needs better error-handling.
31 #include "as.h" /* For BAD_CASE() only */
33 #if (__STDC__ != 1) && !defined(const)
34 #define const /* Nothing */
37 static char lex [256];
38 static char symbol_chars[] =
39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
41 /* These will go in BSS if not defined elsewhere, producing empty strings. */
42 extern const char comment_chars[];
43 extern const char line_comment_chars[];
44 extern const char line_separator_chars[];
46 #define LEX_IS_SYMBOL_COMPONENT 1
47 #define LEX_IS_WHITESPACE 2
48 #define LEX_IS_LINE_SEPARATOR 3
49 #define LEX_IS_COMMENT_START 4
50 #define LEX_IS_LINE_COMMENT_START 5
51 #define LEX_IS_TWOCHAR_COMMENT_1ST 6
52 #define LEX_IS_TWOCHAR_COMMENT_2ND 7
53 #define LEX_IS_STRINGQUOTE 8
54 #define LEX_IS_COLON 9
55 #define LEX_IS_NEWLINE 10
56 #define LEX_IS_ONECHAR_QUOTE 11
57 #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
58 #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
59 #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
60 #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
61 #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
62 #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
64 /* FIXME-soon: The entire lexer/parser thingy should be
65 built statically at compile time rather than dynamically
66 each and every time the assembler is run. xoxorich. */
68 void do_scrub_begin() {
71 lex[' '] = LEX_IS_WHITESPACE;
72 lex['\t'] = LEX_IS_WHITESPACE;
73 lex['\n'] = LEX_IS_NEWLINE;
74 lex[';'] = LEX_IS_LINE_SEPARATOR;
75 lex['"'] = LEX_IS_STRINGQUOTE;
76 lex['\''] = LEX_IS_ONECHAR_QUOTE;
77 lex[':'] = LEX_IS_COLON;
79 /* Note that these override the previous defaults, e.g. if ';'
80 is a comment char, then it isn't a line separator. */
81 for (p = symbol_chars; *p; ++p) {
82 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
83 } /* declare symbol characters */
85 for (p = line_comment_chars; *p; p++) {
86 lex[*p] = LEX_IS_LINE_COMMENT_START;
87 } /* declare line comment chars */
89 for (p = comment_chars; *p; p++) {
90 lex[*p] = LEX_IS_COMMENT_START;
91 } /* declare comment chars */
93 for (p = line_separator_chars; *p; p++) {
94 lex[*p] = LEX_IS_LINE_SEPARATOR;
95 } /* declare line separators */
97 /* Only allow slash-star comments if slash is not in use */
99 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
101 /* FIXME-soon. This is a bad hack but otherwise, we
102 can't do c-style comments when '/' is a line
103 comment char. xoxorich. */
105 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
107 } /* do_scrub_begin() */
111 int scrub_from_file() {
112 return getc(scrub_file);
115 void scrub_to_file(ch)
118 ungetc(ch,scrub_file);
119 } /* scrub_to_file() */
122 char *scrub_last_string;
124 int scrub_from_string() {
125 return scrub_string == scrub_last_string ? EOF : *scrub_string++;
126 } /* scrub_from_string() */
128 void scrub_to_string(ch)
132 } /* scrub_to_string() */
134 /* Saved state of the scrubber */
136 static int old_state;
137 static char *out_string;
138 static char out_buf[20];
139 static int add_newlines = 0;
141 /* Data structure for saving the state of app across #include's. Note that
142 app is called asynchronously to the parsing of the .include's, so our
143 state at the time .include is interpreted is completely unrelated.
144 That's why we have to save it all. */
150 char out_buf[sizeof (out_buf)];
153 char *scrub_last_string;
158 register struct app_save *saved;
160 saved = (struct app_save *) xmalloc(sizeof (*saved));
161 saved->state = state;
162 saved->old_state = old_state;
163 saved->out_string = out_string;
164 bcopy(saved->out_buf, out_buf, sizeof(out_buf));
165 saved->add_newlines = add_newlines;
166 saved->scrub_string = scrub_string;
167 saved->scrub_last_string = scrub_last_string;
168 saved->scrub_file = scrub_file;
170 /* do_scrub_begin() is not useful, just wastes time. */
171 return (char *)saved;
177 register struct app_save *saved = (struct app_save *)arg;
179 /* There is no do_scrub_end (). */
180 state = saved->state;
181 old_state = saved->old_state;
182 out_string = saved->out_string;
183 bcopy (out_buf, saved->out_buf, sizeof (out_buf));
184 add_newlines = saved->add_newlines;
185 scrub_string = saved->scrub_string;
186 scrub_last_string = saved->scrub_last_string;
187 scrub_file = saved->scrub_file;
192 int do_scrub_next_char(get,unget)
196 /*State 0: beginning of normal line
197 1: After first whitespace on line (flush more white)
198 2: After first non-white (opcode) on line (keep 1white)
199 3: after second white on line (into operands) (flush white)
200 4: after putting out a .line, put out digits
201 5: parsing a string, then go to old-state
202 6: putting out \ escape in a "d string.
203 7: After putting out a .app-file, put out string.
204 8: After putting out a .app-file string, flush until newline.
205 -1: output string in out_string and go to the state in old_state
206 -2: flush text until a '*' '/' is seen, then go to state old_state
209 register int ch, ch2 = 0;
224 } while(ch!=EOF && ch!='\n' && ch!='*');
225 if(ch=='\n' || ch==EOF)
228 /* At this point, ch must be a '*' */
229 while ( (ch=(*get)()) == '*' ){
232 if(ch==EOF || ch=='/')
241 if(ch==EOF || (ch>='0' && ch<='9'))
244 while(ch!=EOF && IS_WHITESPACE(ch))
248 out_string="\n.app-file ";
251 return *out_string++;
253 while(ch!=EOF && ch!='\n')
264 } else if(ch=='\\') {
268 as_warn("End of file in string: inserted '\"'");
280 /* This is neet. Turn "string
281 more string" into "string\n more string"
297 #endif /* BACKSLASH_V */
308 #ifdef ONLY_STANDARD_ESCAPES
310 as_warn("Unknown escape '\\%c' in string: Ignored",ch);
312 #else /* ONLY_STANDARD_ESCAPES */
314 /* Accept \x as x for any x */
316 #endif /* ONLY_STANDARD_ESCAPES */
319 as_warn("End of file in string: '\"' inserted");
337 /* OK, we are somewhere in states 0 through 4 */
344 as_warn("End of file not at end of a line: Newline inserted.");
349 case LEX_IS_WHITESPACE:
351 while(ch!=EOF && IS_WHITESPACE(ch));
354 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
358 case 0: state++; goto recycle; /* Punted leading sp */
359 case 1: BAD_CASE(state); /* We can't get here */
360 case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */
361 case 3: goto recycle; /* Sp in operands */
362 default: BAD_CASE(state);
366 case LEX_IS_TWOCHAR_COMMENT_1ST:
368 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
372 if(ch2 != EOF && IS_NEWLINE(ch2))
375 (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
378 (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
383 || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
388 as_warn("End of file in multiline comment");
399 case LEX_IS_STRINGQUOTE:
405 case LEX_IS_ONECHAR_QUOTE:
408 as_warn("End-of-file after a one-character quote; \000 inserted");
411 sprintf(out_buf,"%d", (int)(unsigned char)ch);
413 /* None of these 'x constants for us. We want 'x'.
415 if ( (ch=(*get)()) != '\'' ) {
416 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
417 as_warn("Missing close quote: (assumed)");
426 return *out_string++;
434 /* Roll out a bunch of newlines from inside comments, etc. */
439 /* fall thru into... */
441 case LEX_IS_LINE_SEPARATOR:
445 case LEX_IS_LINE_COMMENT_START:
446 if (state != 0) /* Not at start of line, act normal */
449 /* FIXME-someday: The two character comment stuff was badly
450 thought out. On i386, we want '/' as line comment start
451 AND we want C style comments. hence this hack. The
452 whole lexical process should be reworked. xoxorich. */
454 if (ch == '/' && (ch2 = (*get)()) == '*') {
456 return(do_scrub_next_char(get, unget));
462 while(ch!=EOF && IS_WHITESPACE(ch));
464 as_warn("EOF in comment: Newline inserted");
467 if(ch<'0' || ch>'9') {
468 /* Non-numerics: Eat whole comment line */
469 while(ch!=EOF && !IS_NEWLINE(ch))
472 as_warn("EOF in Comment: Newline inserted");
476 /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
481 return *out_string++;
483 case LEX_IS_COMMENT_START:
485 while(ch!=EOF && !IS_NEWLINE(ch));
487 as_warn("EOF in comment: Newline inserted");
493 /* Some relatively `normal' character. */
495 state=2; /* Now seeing opcode */
497 } else if(state==1) {
501 return ch; /* Opcode or operands already */
509 char comment_chars[] = "|";
510 char line_comment_chars[] = "#";
517 while((ch=do_scrub_next_char(stdin))!=EOF)