]>
Commit | Line | Data |
---|---|---|
3340f7e5 | 1 | /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc. |
a39116f1 RP |
2 | |
3 | Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 | |
4 | */ | |
fecd2382 RP |
5 | /* This is the Assembler Pre-Processor |
6 | Copyright (C) 1987 Free Software Foundation, Inc. | |
a39116f1 RP |
7 | |
8 | This file is part of GAS, the GNU Assembler. | |
9 | ||
10 | GAS is free software; you can redistribute it and/or modify | |
11 | it under the terms of the GNU General Public License as published by | |
12 | the Free Software Foundation; either version 2, or (at your option) | |
13 | any later version. | |
14 | ||
15 | GAS is distributed in the hope that it will be useful, | |
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 | GNU General Public License for more details. | |
19 | ||
20 | You should have received a copy of the GNU General Public License | |
21 | along with GAS; see the file COPYING. If not, write to | |
22 | the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ | |
fecd2382 RP |
23 | |
24 | /* App, the assembler pre-processor. This pre-processor strips out excess | |
25 | spaces, turns single-quoted characters into a decimal constant, and turns | |
26 | # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair. | |
27 | This needs better error-handling. | |
a39116f1 | 28 | */ |
fecd2382 RP |
29 | |
30 | #include <stdio.h> | |
31 | #include "as.h" /* For BAD_CASE() only */ | |
32 | ||
3340f7e5 | 33 | #if (__STDC__ != 1) && !defined(const) |
fecd2382 RP |
34 | #define const /* Nothing */ |
35 | #endif | |
36 | ||
37 | static char lex [256]; | |
38 | static char symbol_chars[] = | |
a39116f1 | 39 | "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; |
fecd2382 RP |
40 | |
41 | /* These will go in BSS if not defined elsewhere, producing empty strings. */ | |
42 | extern const char comment_chars[]; | |
43 | extern const char line_comment_chars[]; | |
44 | extern const char line_separator_chars[]; | |
45 | ||
46 | #define LEX_IS_SYMBOL_COMPONENT 1 | |
47 | #define LEX_IS_WHITESPACE 2 | |
48 | #define LEX_IS_LINE_SEPARATOR 3 | |
49 | #define LEX_IS_COMMENT_START 4 | |
50 | #define LEX_IS_LINE_COMMENT_START 5 | |
51 | #define LEX_IS_TWOCHAR_COMMENT_1ST 6 | |
52 | #define LEX_IS_TWOCHAR_COMMENT_2ND 7 | |
53 | #define LEX_IS_STRINGQUOTE 8 | |
54 | #define LEX_IS_COLON 9 | |
55 | #define LEX_IS_NEWLINE 10 | |
56 | #define LEX_IS_ONECHAR_QUOTE 11 | |
a39116f1 RP |
57 | #define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT) |
58 | #define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE) | |
59 | #define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR) | |
60 | #define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START) | |
61 | #define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START) | |
62 | #define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE) | |
63 | ||
64 | /* FIXME-soon: The entire lexer/parser thingy should be | |
65 | built statically at compile time rather than dynamically | |
66 | each and every time the assembler is run. xoxorich. */ | |
fecd2382 RP |
67 | |
68 | void do_scrub_begin() { | |
69 | const char *p; | |
a39116f1 RP |
70 | |
71 | lex[' '] = LEX_IS_WHITESPACE; | |
72 | lex['\t'] = LEX_IS_WHITESPACE; | |
73 | lex['\n'] = LEX_IS_NEWLINE; | |
74 | lex[';'] = LEX_IS_LINE_SEPARATOR; | |
75 | lex['"'] = LEX_IS_STRINGQUOTE; | |
76 | lex['\''] = LEX_IS_ONECHAR_QUOTE; | |
77 | lex[':'] = LEX_IS_COLON; | |
78 | ||
fecd2382 RP |
79 | /* Note that these override the previous defaults, e.g. if ';' |
80 | is a comment char, then it isn't a line separator. */ | |
a39116f1 | 81 | for (p = symbol_chars; *p; ++p) { |
fecd2382 | 82 | lex[*p] = LEX_IS_SYMBOL_COMPONENT; |
a39116f1 RP |
83 | } /* declare symbol characters */ |
84 | ||
85 | for (p = line_comment_chars; *p; p++) { | |
fecd2382 | 86 | lex[*p] = LEX_IS_LINE_COMMENT_START; |
a39116f1 RP |
87 | } /* declare line comment chars */ |
88 | ||
89 | for (p = comment_chars; *p; p++) { | |
90 | lex[*p] = LEX_IS_COMMENT_START; | |
91 | } /* declare comment chars */ | |
92 | ||
93 | for (p = line_separator_chars; *p; p++) { | |
fecd2382 | 94 | lex[*p] = LEX_IS_LINE_SEPARATOR; |
a39116f1 RP |
95 | } /* declare line separators */ |
96 | ||
fecd2382 RP |
97 | /* Only allow slash-star comments if slash is not in use */ |
98 | if (lex['/'] == 0) { | |
a39116f1 | 99 | lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST; |
fecd2382 | 100 | } |
a39116f1 RP |
101 | /* FIXME-soon. This is a bad hack but otherwise, we |
102 | can't do c-style comments when '/' is a line | |
103 | comment char. xoxorich. */ | |
104 | if (lex['*'] == 0) { | |
105 | lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND; | |
106 | } | |
107 | } /* do_scrub_begin() */ | |
fecd2382 RP |
108 | |
109 | FILE *scrub_file; | |
110 | ||
111 | int scrub_from_file() { | |
112 | return getc(scrub_file); | |
113 | } | |
114 | ||
115 | void scrub_to_file(ch) | |
116 | int ch; | |
117 | { | |
118 | ungetc(ch,scrub_file); | |
119 | } /* scrub_to_file() */ | |
120 | ||
121 | char *scrub_string; | |
122 | char *scrub_last_string; | |
123 | ||
124 | int scrub_from_string() { | |
125 | return scrub_string == scrub_last_string ? EOF : *scrub_string++; | |
126 | } /* scrub_from_string() */ | |
127 | ||
128 | void scrub_to_string(ch) | |
129 | int ch; | |
130 | { | |
131 | *--scrub_string=ch; | |
132 | } /* scrub_to_string() */ | |
133 | ||
134 | /* Saved state of the scrubber */ | |
135 | static int state; | |
136 | static int old_state; | |
137 | static char *out_string; | |
138 | static char out_buf[20]; | |
139 | static int add_newlines = 0; | |
140 | ||
141 | /* Data structure for saving the state of app across #include's. Note that | |
142 | app is called asynchronously to the parsing of the .include's, so our | |
143 | state at the time .include is interpreted is completely unrelated. | |
144 | That's why we have to save it all. */ | |
145 | ||
146 | struct app_save { | |
a39116f1 RP |
147 | int state; |
148 | int old_state; | |
149 | char *out_string; | |
150 | char out_buf[sizeof (out_buf)]; | |
151 | int add_newlines; | |
152 | char *scrub_string; | |
153 | char *scrub_last_string; | |
154 | FILE *scrub_file; | |
fecd2382 RP |
155 | }; |
156 | ||
157 | char *app_push() { | |
a39116f1 RP |
158 | register struct app_save *saved; |
159 | ||
160 | saved = (struct app_save *) xmalloc(sizeof (*saved)); | |
161 | saved->state = state; | |
162 | saved->old_state = old_state; | |
163 | saved->out_string = out_string; | |
164 | bcopy(saved->out_buf, out_buf, sizeof(out_buf)); | |
165 | saved->add_newlines = add_newlines; | |
166 | saved->scrub_string = scrub_string; | |
167 | saved->scrub_last_string = scrub_last_string; | |
168 | saved->scrub_file = scrub_file; | |
169 | ||
170 | /* do_scrub_begin() is not useful, just wastes time. */ | |
171 | return (char *)saved; | |
fecd2382 RP |
172 | } |
173 | ||
174 | void app_pop(arg) | |
175 | char *arg; | |
176 | { | |
a39116f1 RP |
177 | register struct app_save *saved = (struct app_save *)arg; |
178 | ||
179 | /* There is no do_scrub_end (). */ | |
180 | state = saved->state; | |
181 | old_state = saved->old_state; | |
182 | out_string = saved->out_string; | |
183 | bcopy (out_buf, saved->out_buf, sizeof (out_buf)); | |
184 | add_newlines = saved->add_newlines; | |
185 | scrub_string = saved->scrub_string; | |
186 | scrub_last_string = saved->scrub_last_string; | |
187 | scrub_file = saved->scrub_file; | |
188 | ||
189 | free (arg); | |
fecd2382 RP |
190 | } /* app_pop() */ |
191 | ||
192 | int do_scrub_next_char(get,unget) | |
193 | int (*get)(); | |
194 | void (*unget)(); | |
195 | { | |
196 | /*State 0: beginning of normal line | |
a39116f1 RP |
197 | 1: After first whitespace on line (flush more white) |
198 | 2: After first non-white (opcode) on line (keep 1white) | |
199 | 3: after second white on line (into operands) (flush white) | |
200 | 4: after putting out a .line, put out digits | |
201 | 5: parsing a string, then go to old-state | |
202 | 6: putting out \ escape in a "d string. | |
203 | 7: After putting out a .app-file, put out string. | |
204 | 8: After putting out a .app-file string, flush until newline. | |
205 | -1: output string in out_string and go to the state in old_state | |
206 | -2: flush text until a '*' '/' is seen, then go to state old_state | |
207 | */ | |
208 | ||
209 | register int ch, ch2 = 0; | |
210 | ||
fecd2382 RP |
211 | switch (state) { |
212 | case -1: | |
213 | ch= *out_string++; | |
214 | if(*out_string==0) { | |
215 | state=old_state; | |
216 | old_state=3; | |
217 | } | |
218 | return ch; | |
a39116f1 | 219 | |
fecd2382 RP |
220 | case -2: |
221 | for(;;) { | |
222 | do { | |
223 | ch=(*get)(); | |
224 | } while(ch!=EOF && ch!='\n' && ch!='*'); | |
225 | if(ch=='\n' || ch==EOF) | |
a39116f1 RP |
226 | return ch; |
227 | ||
fecd2382 RP |
228 | /* At this point, ch must be a '*' */ |
229 | while ( (ch=(*get)()) == '*' ){ | |
230 | ; | |
231 | } | |
232 | if(ch==EOF || ch=='/') | |
a39116f1 | 233 | break; |
fecd2382 RP |
234 | (*unget)(ch); |
235 | } | |
236 | state=old_state; | |
237 | return ' '; | |
a39116f1 | 238 | |
fecd2382 RP |
239 | case 4: |
240 | ch=(*get)(); | |
241 | if(ch==EOF || (ch>='0' && ch<='9')) | |
a39116f1 | 242 | return ch; |
fecd2382 RP |
243 | else { |
244 | while(ch!=EOF && IS_WHITESPACE(ch)) | |
a39116f1 | 245 | ch=(*get)(); |
fecd2382 RP |
246 | if(ch=='"') { |
247 | (*unget)(ch); | |
248 | out_string="\n.app-file "; | |
249 | old_state=7; | |
250 | state= -1; | |
251 | return *out_string++; | |
252 | } else { | |
253 | while(ch!=EOF && ch!='\n') | |
a39116f1 | 254 | ch=(*get)(); |
fecd2382 RP |
255 | return ch; |
256 | } | |
257 | } | |
a39116f1 | 258 | |
fecd2382 RP |
259 | case 5: |
260 | ch=(*get)(); | |
261 | if(ch=='"') { | |
262 | state=old_state; | |
263 | return '"'; | |
264 | } else if(ch=='\\') { | |
265 | state=6; | |
266 | return ch; | |
267 | } else if(ch==EOF) { | |
268 | as_warn("End of file in string: inserted '\"'"); | |
269 | state=old_state; | |
270 | (*unget)('\n'); | |
271 | return '"'; | |
272 | } else { | |
273 | return ch; | |
274 | } | |
a39116f1 | 275 | |
fecd2382 RP |
276 | case 6: |
277 | state=5; | |
278 | ch=(*get)(); | |
279 | switch(ch) { | |
280 | /* This is neet. Turn "string | |
281 | more string" into "string\n more string" | |
a39116f1 | 282 | */ |
fecd2382 RP |
283 | case '\n': |
284 | (*unget)('n'); | |
285 | add_newlines++; | |
286 | return '\\'; | |
a39116f1 | 287 | |
fecd2382 RP |
288 | case '"': |
289 | case '\\': | |
290 | case 'b': | |
291 | case 'f': | |
292 | case 'n': | |
293 | case 'r': | |
294 | case 't': | |
295 | #ifdef BACKSLASH_V | |
296 | case 'v': | |
297 | #endif /* BACKSLASH_V */ | |
298 | case '0': | |
299 | case '1': | |
300 | case '2': | |
301 | case '3': | |
302 | case '4': | |
303 | case '5': | |
304 | case '6': | |
305 | case '7': | |
306 | break; | |
a39116f1 | 307 | |
fecd2382 RP |
308 | #ifdef ONLY_STANDARD_ESCAPES |
309 | default: | |
310 | as_warn("Unknown escape '\\%c' in string: Ignored",ch); | |
311 | break; | |
312 | #else /* ONLY_STANDARD_ESCAPES */ | |
313 | default: | |
314 | /* Accept \x as x for any x */ | |
315 | break; | |
316 | #endif /* ONLY_STANDARD_ESCAPES */ | |
a39116f1 | 317 | |
fecd2382 RP |
318 | case EOF: |
319 | as_warn("End of file in string: '\"' inserted"); | |
320 | return '"'; | |
321 | } | |
322 | return ch; | |
a39116f1 | 323 | |
fecd2382 RP |
324 | case 7: |
325 | ch=(*get)(); | |
326 | state=5; | |
327 | old_state=8; | |
328 | return ch; | |
a39116f1 | 329 | |
fecd2382 RP |
330 | case 8: |
331 | do ch= (*get)(); | |
332 | while(ch!='\n'); | |
333 | state=0; | |
334 | return ch; | |
335 | } | |
a39116f1 | 336 | |
fecd2382 | 337 | /* OK, we are somewhere in states 0 through 4 */ |
a39116f1 RP |
338 | |
339 | /* flushchar: */ | |
fecd2382 RP |
340 | ch=(*get)(); |
341 | recycle: | |
342 | if (ch == EOF) { | |
343 | if (state != 0) | |
a39116f1 | 344 | as_warn("End of file not at end of a line: Newline inserted."); |
fecd2382 RP |
345 | return ch; |
346 | } | |
a39116f1 | 347 | |
fecd2382 RP |
348 | switch (lex[ch]) { |
349 | case LEX_IS_WHITESPACE: | |
350 | do ch=(*get)(); | |
351 | while(ch!=EOF && IS_WHITESPACE(ch)); | |
352 | if(ch==EOF) | |
a39116f1 | 353 | return ch; |
fecd2382 RP |
354 | if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) { |
355 | goto recycle; | |
356 | } | |
357 | switch (state) { | |
358 | case 0: state++; goto recycle; /* Punted leading sp */ | |
359 | case 1: BAD_CASE(state); /* We can't get here */ | |
360 | case 2: state++; (*unget)(ch); return ' '; /* Sp after opco */ | |
361 | case 3: goto recycle; /* Sp in operands */ | |
362 | default: BAD_CASE(state); | |
363 | } | |
364 | break; | |
a39116f1 | 365 | |
fecd2382 RP |
366 | case LEX_IS_TWOCHAR_COMMENT_1ST: |
367 | ch2=(*get)(); | |
368 | if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) { | |
369 | for(;;) { | |
370 | do { | |
371 | ch2=(*get)(); | |
372 | if(ch2 != EOF && IS_NEWLINE(ch2)) | |
a39116f1 | 373 | add_newlines++; |
fecd2382 | 374 | } while(ch2!=EOF && |
a39116f1 RP |
375 | (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND)); |
376 | ||
fecd2382 | 377 | while (ch2!=EOF && |
a39116f1 | 378 | (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){ |
fecd2382 RP |
379 | ch2=(*get)(); |
380 | } | |
a39116f1 | 381 | |
fecd2382 | 382 | if(ch2==EOF |
a39116f1 RP |
383 | || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST) |
384 | break; | |
fecd2382 RP |
385 | (*unget)(ch); |
386 | } | |
387 | if(ch2==EOF) | |
a39116f1 RP |
388 | as_warn("End of file in multiline comment"); |
389 | ||
fecd2382 RP |
390 | ch = ' '; |
391 | goto recycle; | |
392 | } else { | |
393 | if(ch2!=EOF) | |
a39116f1 | 394 | (*unget)(ch2); |
fecd2382 RP |
395 | return ch; |
396 | } | |
397 | break; | |
a39116f1 | 398 | |
fecd2382 RP |
399 | case LEX_IS_STRINGQUOTE: |
400 | old_state=state; | |
401 | state=5; | |
402 | return ch; | |
a39116f1 RP |
403 | |
404 | #ifndef IEEE_STYLE | |
fecd2382 RP |
405 | case LEX_IS_ONECHAR_QUOTE: |
406 | ch=(*get)(); | |
407 | if(ch==EOF) { | |
408 | as_warn("End-of-file after a one-character quote; \000 inserted"); | |
409 | ch=0; | |
410 | } | |
411 | sprintf(out_buf,"%d", (int)(unsigned char)ch); | |
a39116f1 | 412 | |
fecd2382 RP |
413 | /* None of these 'x constants for us. We want 'x'. |
414 | */ | |
415 | if ( (ch=(*get)()) != '\'' ) { | |
416 | #ifdef REQUIRE_CHAR_CLOSE_QUOTE | |
417 | as_warn("Missing close quote: (assumed)"); | |
418 | #else | |
419 | (*unget)(ch); | |
420 | #endif | |
421 | } | |
a39116f1 | 422 | |
fecd2382 RP |
423 | old_state=state; |
424 | state= -1; | |
425 | out_string=out_buf; | |
426 | return *out_string++; | |
a39116f1 | 427 | #endif |
fecd2382 RP |
428 | case LEX_IS_COLON: |
429 | if(state!=3) | |
a39116f1 | 430 | state=0; |
fecd2382 | 431 | return ch; |
a39116f1 | 432 | |
fecd2382 RP |
433 | case LEX_IS_NEWLINE: |
434 | /* Roll out a bunch of newlines from inside comments, etc. */ | |
435 | if(add_newlines) { | |
436 | --add_newlines; | |
437 | (*unget)(ch); | |
438 | } | |
439 | /* fall thru into... */ | |
a39116f1 | 440 | |
fecd2382 RP |
441 | case LEX_IS_LINE_SEPARATOR: |
442 | state=0; | |
443 | return ch; | |
a39116f1 | 444 | |
fecd2382 RP |
445 | case LEX_IS_LINE_COMMENT_START: |
446 | if (state != 0) /* Not at start of line, act normal */ | |
a39116f1 RP |
447 | goto de_fault; |
448 | ||
449 | /* FIXME-someday: The two character comment stuff was badly | |
450 | thought out. On i386, we want '/' as line comment start | |
451 | AND we want C style comments. hence this hack. The | |
452 | whole lexical process should be reworked. xoxorich. */ | |
453 | ||
454 | if (ch == '/' && (ch2 = (*get)()) == '*') { | |
455 | state = -2; | |
456 | return(do_scrub_next_char(get, unget)); | |
457 | } else { | |
458 | (*unget)(ch2); | |
459 | } /* bad hack */ | |
460 | ||
fecd2382 RP |
461 | do ch=(*get)(); |
462 | while(ch!=EOF && IS_WHITESPACE(ch)); | |
463 | if(ch==EOF) { | |
464 | as_warn("EOF in comment: Newline inserted"); | |
465 | return '\n'; | |
466 | } | |
467 | if(ch<'0' || ch>'9') { | |
468 | /* Non-numerics: Eat whole comment line */ | |
469 | while(ch!=EOF && !IS_NEWLINE(ch)) | |
a39116f1 | 470 | ch=(*get)(); |
fecd2382 | 471 | if(ch==EOF) |
a39116f1 | 472 | as_warn("EOF in Comment: Newline inserted"); |
fecd2382 RP |
473 | state=0; |
474 | return '\n'; | |
475 | } | |
476 | /* Numerics begin comment. Perhaps CPP `# 123 "filename"' */ | |
477 | (*unget)(ch); | |
478 | old_state=4; | |
479 | state= -1; | |
480 | out_string=".line "; | |
481 | return *out_string++; | |
a39116f1 | 482 | |
fecd2382 RP |
483 | case LEX_IS_COMMENT_START: |
484 | do ch=(*get)(); | |
485 | while(ch!=EOF && !IS_NEWLINE(ch)); | |
486 | if(ch==EOF) | |
a39116f1 | 487 | as_warn("EOF in comment: Newline inserted"); |
fecd2382 RP |
488 | state=0; |
489 | return '\n'; | |
a39116f1 | 490 | |
fecd2382 RP |
491 | default: |
492 | de_fault: | |
493 | /* Some relatively `normal' character. */ | |
494 | if(state==0) { | |
495 | state=2; /* Now seeing opcode */ | |
496 | return ch; | |
497 | } else if(state==1) { | |
498 | state=2; /* Ditto */ | |
499 | return ch; | |
500 | } else { | |
501 | return ch; /* Opcode or operands already */ | |
502 | } | |
503 | } | |
504 | return -1; | |
505 | } | |
506 | ||
507 | #ifdef TEST | |
508 | ||
509 | char comment_chars[] = "|"; | |
510 | char line_comment_chars[] = "#"; | |
511 | ||
512 | main() | |
513 | { | |
514 | int ch; | |
a39116f1 | 515 | |
fecd2382 RP |
516 | app_begin(); |
517 | while((ch=do_scrub_next_char(stdin))!=EOF) | |
a39116f1 | 518 | putc(ch,stdout); |
fecd2382 RP |
519 | } |
520 | ||
521 | as_warn(str) | |
522 | char *str; | |
523 | { | |
524 | fputs(str,stderr); | |
525 | putc('\n',stderr); | |
526 | } | |
527 | #endif | |
528 | ||
529 | /* | |
530 | * Local Variables: | |
531 | * comment-column: 0 | |
532 | * fill-column: 131 | |
533 | * End: | |
534 | */ | |
535 | ||
536 | /* end of app.c */ |