gas/app.c

   1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   2
   3    Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   4    */
   5 /* This is the Assembler Pre-Processor
   6    Copyright (C) 1987 Free Software Foundation, Inc.
   7
   8    This file is part of GAS, the GNU Assembler.
   9
  10    GAS is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    GAS is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with GAS; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  23
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
  27    This needs better error-handling.
  28    */
  29
  30 #include <stdio.h>
  31 #include "as.h"         /* For BAD_CASE() only */
  32
  33 #if (__STDC__ != 1) && !defined(const)
  34 #define const /* Nothing */
  35 #endif
  36
  37 static char     lex [256];
  38 static char     symbol_chars[] =
  39     "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  40
  41 /* These will go in BSS if not defined elsewhere, producing empty strings. */
  42 extern const char comment_chars[];
  43 extern const char line_comment_chars[];
  44 extern const char line_separator_chars[];
  45
  46 #define LEX_IS_SYMBOL_COMPONENT         1
  47 #define LEX_IS_WHITESPACE               2
  48 #define LEX_IS_LINE_SEPARATOR           3
  49 #define LEX_IS_COMMENT_START            4
  50 #define LEX_IS_LINE_COMMENT_START       5
  51 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  52 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  53 #define LEX_IS_STRINGQUOTE              8
  54 #define LEX_IS_COLON                    9
  55 #define LEX_IS_NEWLINE                  10
  56 #define LEX_IS_ONECHAR_QUOTE            11
  57 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  58 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  59 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  60 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  61 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  62 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  63
  64 /* FIXME-soon: The entire lexer/parser thingy should be
  65    built statically at compile time rather than dynamically
  66    each and every time the assembler is run.  xoxorich. */
  67
  68 void do_scrub_begin() {
  69         const char *p;
  70
  71         lex[' '] = LEX_IS_WHITESPACE;
  72         lex['\t'] = LEX_IS_WHITESPACE;
  73         lex['\n'] = LEX_IS_NEWLINE;
  74         lex[';'] = LEX_IS_LINE_SEPARATOR;
  75         lex['"'] = LEX_IS_STRINGQUOTE;
  76         lex['\''] = LEX_IS_ONECHAR_QUOTE;
  77         lex[':'] = LEX_IS_COLON;
  78
  79         /* Note that these override the previous defaults, e.g. if ';'
  80            is a comment char, then it isn't a line separator.  */
  81         for (p = symbol_chars; *p; ++p) {
  82                 lex[*p] = LEX_IS_SYMBOL_COMPONENT;
  83         } /* declare symbol characters */
  84
  85         for (p = line_comment_chars; *p; p++) {
  86                 lex[*p] = LEX_IS_LINE_COMMENT_START;
  87         } /* declare line comment chars */
  88
  89         for (p = comment_chars; *p; p++) {
  90                 lex[*p] = LEX_IS_COMMENT_START;
  91         } /* declare comment chars */
  92
  93         for (p = line_separator_chars; *p; p++) {
  94                 lex[*p] = LEX_IS_LINE_SEPARATOR;
  95         } /* declare line separators */
  96
  97         /* Only allow slash-star comments if slash is not in use */
  98         if (lex['/'] == 0) {
  99                 lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 100         }
 101         /* FIXME-soon.  This is a bad hack but otherwise, we
 102            can't do c-style comments when '/' is a line
 103            comment char. xoxorich. */
 104         if (lex['*'] == 0) {
 105                 lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 106         }
 107 } /* do_scrub_begin() */
 108
 109 FILE *scrub_file;
 110
 111 int scrub_from_file() {
 112         return getc(scrub_file);
 113 }
 114
 115 void scrub_to_file(ch)
 116 int ch;
 117 {
 118         ungetc(ch,scrub_file);
 119 } /* scrub_to_file() */
 120
 121 char *scrub_string;
 122 char *scrub_last_string;
 123
 124 int scrub_from_string() {
 125         return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 126 } /* scrub_from_string() */
 127
 128 void scrub_to_string(ch)
 129 int ch;
 130 {
 131         *--scrub_string=ch;
 132 } /* scrub_to_string() */
 133
 134 /* Saved state of the scrubber */
 135 static int state;
 136 static int old_state;
 137 static char *out_string;
 138 static char out_buf[20];
 139 static int add_newlines = 0;
 140
 141 /* Data structure for saving the state of app across #include's.  Note that
 142    app is called asynchronously to the parsing of the .include's, so our
 143    state at the time .include is interpreted is completely unrelated.
 144    That's why we have to save it all.  */
 145
 146 struct app_save {
 147         int state;
 148         int old_state;
 149         char *out_string;
 150         char out_buf[sizeof (out_buf)];
 151         int add_newlines;
 152         char *scrub_string;
 153         char *scrub_last_string;
 154         FILE *scrub_file;
 155 };
 156
 157 char *app_push() {
 158         register struct app_save *saved;
 159
 160         saved = (struct app_save *) xmalloc(sizeof (*saved));
 161         saved->state            = state;
 162         saved->old_state        = old_state;
 163         saved->out_string       = out_string;
 164         bcopy(saved->out_buf, out_buf, sizeof(out_buf));
 165         saved->add_newlines     = add_newlines;
 166         saved->scrub_string     = scrub_string;
 167         saved->scrub_last_string = scrub_last_string;
 168         saved->scrub_file       = scrub_file;
 169
 170         /* do_scrub_begin() is not useful, just wastes time. */
 171         return (char *)saved;
 172 }
 173
 174 void app_pop(arg)
 175 char *arg;
 176 {
 177         register struct app_save *saved = (struct app_save *)arg;
 178
 179         /* There is no do_scrub_end (). */
 180         state           = saved->state;
 181         old_state       = saved->old_state;
 182         out_string      = saved->out_string;
 183         bcopy (out_buf,  saved->out_buf, sizeof (out_buf));
 184         add_newlines    = saved->add_newlines;
 185         scrub_string    = saved->scrub_string;
 186         scrub_last_string = saved->scrub_last_string;
 187         scrub_file      = saved->scrub_file;
 188
 189         free (arg);
 190 } /* app_pop() */
 191
 192 int do_scrub_next_char(get,unget)
 193 int (*get)();
 194 void (*unget)();
 195 {
 196         /*State 0: beginning of normal line
 197           1: After first whitespace on line (flush more white)
 198           2: After first non-white (opcode) on line (keep 1white)
 199           3: after second white on line (into operands) (flush white)
 200           4: after putting out a .line, put out digits
 201           5: parsing a string, then go to old-state
 202           6: putting out \ escape in a "d string.
 203           7: After putting out a .app-file, put out string.
 204           8: After putting out a .app-file string, flush until newline.
 205           -1: output string in out_string and go to the state in old_state
 206           -2: flush text until a '*' '/' is seen, then go to state old_state
 207           */
 208
 209         register int ch, ch2 = 0;
 210
 211         switch (state) {
 212         case -1:
 213                 ch= *out_string++;
 214                 if(*out_string==0) {
 215                         state=old_state;
 216                         old_state=3;
 217                 }
 218                 return ch;
 219
 220         case -2:
 221                 for(;;) {
 222                         do {
 223                                 ch=(*get)();
 224                         } while(ch!=EOF && ch!='\n' && ch!='*');
 225                         if(ch=='\n' || ch==EOF)
 226                             return ch;
 227
 228                         /* At this point, ch must be a '*' */
 229                         while ( (ch=(*get)()) == '*' ){
 230                                 ;
 231                         }
 232                         if(ch==EOF || ch=='/')
 233                             break;
 234                         (*unget)(ch);
 235                 }
 236                 state=old_state;
 237                 return ' ';
 238
 239         case 4:
 240                 ch=(*get)();
 241                 if(ch==EOF || (ch>='0' && ch<='9'))
 242                     return ch;
 243                 else {
 244                         while(ch!=EOF && IS_WHITESPACE(ch))
 245                             ch=(*get)();
 246                         if(ch=='"') {
 247                                 (*unget)(ch);
 248                                 out_string="\n.app-file ";
 249                                 old_state=7;
 250                                 state= -1;
 251                                 return *out_string++;
 252                         } else {
 253                                 while(ch!=EOF && ch!='\n')
 254                                     ch=(*get)();
 255                                 return ch;
 256                         }
 257                 }
 258
 259         case 5:
 260                 ch=(*get)();
 261                 if(ch=='"') {
 262                         state=old_state;
 263                         return '"';
 264                 } else if(ch=='\\') {
 265                         state=6;
 266                         return ch;
 267                 } else if(ch==EOF) {
 268                         as_warn("End of file in string: inserted '\"'");
 269                         state=old_state;
 270                         (*unget)('\n');
 271                         return '"';
 272                 } else {
 273                         return ch;
 274                 }
 275
 276         case 6:
 277                 state=5;
 278                 ch=(*get)();
 279                 switch(ch) {
 280                         /* This is neet.  Turn "string
 281                            more string" into "string\n  more string"
 282                            */
 283                 case '\n':
 284                         (*unget)('n');
 285                         add_newlines++;
 286                         return '\\';
 287
 288                 case '"':
 289                 case '\\':
 290                 case 'b':
 291                 case 'f':
 292                 case 'n':
 293                 case 'r':
 294                 case 't':
 295 #ifdef BACKSLASH_V
 296                 case 'v':
 297 #endif /* BACKSLASH_V */
 298                 case '0':
 299                 case '1':
 300                 case '2':
 301                 case '3':
 302                 case '4':
 303                 case '5':
 304                 case '6':
 305                 case '7':
 306                         break;
 307
 308 #ifdef ONLY_STANDARD_ESCAPES
 309                 default:
 310                         as_warn("Unknown escape '\\%c' in string: Ignored",ch);
 311                         break;
 312 #else /* ONLY_STANDARD_ESCAPES */
 313                 default:
 314                         /* Accept \x as x for any x */
 315                         break;
 316 #endif /* ONLY_STANDARD_ESCAPES */
 317
 318                 case EOF:
 319                         as_warn("End of file in string: '\"' inserted");
 320                         return '"';
 321                 }
 322                 return ch;
 323
 324         case 7:
 325                 ch=(*get)();
 326                 state=5;
 327                 old_state=8;
 328                 return ch;
 329
 330         case 8:
 331                 do ch= (*get)();
 332                 while(ch!='\n');
 333                 state=0;
 334                 return ch;
 335         }
 336
 337         /* OK, we are somewhere in states 0 through 4 */
 338
 339         /* flushchar: */
 340         ch=(*get)();
 341  recycle:
 342         if (ch == EOF) {
 343                 if (state != 0)
 344                     as_warn("End of file not at end of a line: Newline inserted.");
 345                 return ch;
 346         }
 347
 348         switch (lex[ch]) {
 349         case LEX_IS_WHITESPACE:
 350                 do ch=(*get)();
 351                 while(ch!=EOF && IS_WHITESPACE(ch));
 352                 if(ch==EOF)
 353                     return ch;
 354                 if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
 355                         goto recycle;
 356                 }
 357                 switch (state) {
 358                 case 0: state++; goto recycle;  /* Punted leading sp */
 359                 case 1:          BAD_CASE(state); /* We can't get here */
 360                 case 2: state++; (*unget)(ch); return ' ';  /* Sp after opco */
 361                 case 3:          goto recycle;  /* Sp in operands */
 362                 default:        BAD_CASE(state);
 363                 }
 364                 break;
 365
 366         case LEX_IS_TWOCHAR_COMMENT_1ST:
 367                 ch2=(*get)();
 368                 if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
 369                         for(;;) {
 370                                 do {
 371                                         ch2=(*get)();
 372                                         if(ch2 != EOF && IS_NEWLINE(ch2))
 373                                             add_newlines++;
 374                                 } while(ch2!=EOF &&
 375                                         (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 376
 377                                 while (ch2!=EOF &&
 378                                        (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
 379                                         ch2=(*get)();
 380                                 }
 381
 382                                 if(ch2==EOF
 383                                    || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 384                                     break;
 385                                 (*unget)(ch);
 386                         }
 387                         if(ch2==EOF)
 388                             as_warn("End of file in multiline comment");
 389
 390                         ch = ' ';
 391                         goto recycle;
 392                 } else {
 393                         if(ch2!=EOF)
 394                             (*unget)(ch2);
 395                         return ch;
 396                 }
 397                 break;
 398
 399         case LEX_IS_STRINGQUOTE:
 400                 old_state=state;
 401                 state=5;
 402                 return ch;
 403
 404 #ifndef IEEE_STYLE
 405         case LEX_IS_ONECHAR_QUOTE:
 406                 ch=(*get)();
 407                 if(ch==EOF) {
 408                         as_warn("End-of-file after a one-character quote; \000 inserted");
 409                         ch=0;
 410                 }
 411                 sprintf(out_buf,"%d", (int)(unsigned char)ch);
 412
 413                 /* None of these 'x constants for us.  We want 'x'.
 414                  */
 415                 if ( (ch=(*get)()) != '\'' ) {
 416 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 417                         as_warn("Missing close quote: (assumed)");
 418 #else
 419                         (*unget)(ch);
 420 #endif
 421                 }
 422
 423                 old_state=state;
 424                 state= -1;
 425                 out_string=out_buf;
 426                 return *out_string++;
 427 #endif
 428         case LEX_IS_COLON:
 429                 if(state!=3)
 430                     state=0;
 431                 return ch;
 432
 433         case LEX_IS_NEWLINE:
 434                 /* Roll out a bunch of newlines from inside comments, etc.  */
 435                 if(add_newlines) {
 436                         --add_newlines;
 437                         (*unget)(ch);
 438                 }
 439                 /* fall thru into... */
 440
 441         case LEX_IS_LINE_SEPARATOR:
 442                 state=0;
 443                 return ch;
 444
 445         case LEX_IS_LINE_COMMENT_START:
 446                 if (state != 0)         /* Not at start of line, act normal */
 447                     goto de_fault;
 448
 449                 /* FIXME-someday: The two character comment stuff was badly
 450                    thought out.  On i386, we want '/' as line comment start
 451                    AND we want C style comments.  hence this hack.  The
 452                    whole lexical process should be reworked.  xoxorich.  */
 453
 454                 if (ch == '/' && (ch2 = (*get)()) == '*') {
 455                         state = -2;
 456                         return(do_scrub_next_char(get, unget));
 457                 } else {
 458                         (*unget)(ch2);
 459                 } /* bad hack */
 460
 461                 do ch=(*get)();
 462                 while(ch!=EOF && IS_WHITESPACE(ch));
 463                 if(ch==EOF) {
 464                         as_warn("EOF in comment:  Newline inserted");
 465                         return '\n';
 466                 }
 467                 if(ch<'0' || ch>'9') {
 468                         /* Non-numerics:  Eat whole comment line */
 469                         while(ch!=EOF && !IS_NEWLINE(ch))
 470                             ch=(*get)();
 471                         if(ch==EOF)
 472                             as_warn("EOF in Comment: Newline inserted");
 473                         state=0;
 474                         return '\n';
 475                 }
 476                 /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 477                 (*unget)(ch);
 478                 old_state=4;
 479                 state= -1;
 480                 out_string=".line ";
 481                 return *out_string++;
 482
 483         case LEX_IS_COMMENT_START:
 484                 do ch=(*get)();
 485                 while(ch!=EOF && !IS_NEWLINE(ch));
 486                 if(ch==EOF)
 487                     as_warn("EOF in comment:  Newline inserted");
 488                 state=0;
 489                 return '\n';
 490
 491         default:
 492         de_fault:
 493                 /* Some relatively `normal' character.  */
 494                 if(state==0) {
 495                         state=2;        /* Now seeing opcode */
 496                         return ch;
 497                 } else if(state==1) {
 498                         state=2;        /* Ditto */
 499                         return ch;
 500                 } else {
 501                         return ch;      /* Opcode or operands already */
 502                 }
 503         }
 504         return -1;
 505 }
 506
 507 #ifdef TEST
 508
 509 char comment_chars[] = "|";
 510 char line_comment_chars[] = "#";
 511
 512 main()
 513 {
 514         int     ch;
 515
 516         app_begin();
 517         while((ch=do_scrub_next_char(stdin))!=EOF)
 518             putc(ch,stdout);
 519 }
 520
 521 as_warn(str)
 522 char *str;
 523 {
 524         fputs(str,stderr);
 525         putc('\n',stderr);
 526 }
 527 #endif
 528
 529 /*
 530  * Local Variables:
 531  * comment-column: 0
 532  * fill-column: 131
 533  * End:
 534  */
 535
 536 /* end of app.c */