gas/app.c

   1 /* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   2
   3    Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   4    */
   5 /* This is the Assembler Pre-Processor
   6    Copyright (C) 1987 Free Software Foundation, Inc.
   7
   8    This file is part of GAS, the GNU Assembler.
   9
  10    GAS is free software; you can redistribute it and/or modify
  11    it under the terms of the GNU General Public License as published by
  12    the Free Software Foundation; either version 2, or (at your option)
  13    any later version.
  14
  15    GAS is distributed in the hope that it will be useful,
  16    but WITHOUT ANY WARRANTY; without even the implied warranty of
  17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18    GNU General Public License for more details.
  19
  20    You should have received a copy of the GNU General Public License
  21    along with GAS; see the file COPYING.  If not, write to
  22    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  23
  24 /* App, the assembler pre-processor.  This pre-processor strips out excess
  25    spaces, turns single-quoted characters into a decimal constant, and turns
  26    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  27    pair.  This needs better error-handling.
  28    */
  29
  30 #include <stdio.h>
  31 #include "as.h"                 /* For BAD_CASE() only */
  32
  33 #if (__STDC__ != 1) && !defined(const)
  34 #define const                   /* Nothing */
  35 #endif
  36
  37 static char lex[256];
  38 static const char symbol_chars[] =
  39 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  40
  41 #define LEX_IS_SYMBOL_COMPONENT         1
  42 #define LEX_IS_WHITESPACE               2
  43 #define LEX_IS_LINE_SEPARATOR           3
  44 #define LEX_IS_COMMENT_START            4
  45 #define LEX_IS_LINE_COMMENT_START       5
  46 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  47 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  48 #define LEX_IS_STRINGQUOTE              8
  49 #define LEX_IS_COLON                    9
  50 #define LEX_IS_NEWLINE                  10
  51 #define LEX_IS_ONECHAR_QUOTE            11
  52 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  53 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  54 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  55 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  56 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  57 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  58
  59 /* FIXME-soon: The entire lexer/parser thingy should be
  60    built statically at compile time rather than dynamically
  61    each and every time the assembler is run.  xoxorich. */
  62
  63 void
  64 do_scrub_begin ()
  65 {
  66   const char *p;
  67
  68   lex[' '] = LEX_IS_WHITESPACE;
  69   lex['\t'] = LEX_IS_WHITESPACE;
  70   lex['\n'] = LEX_IS_NEWLINE;
  71   lex[';'] = LEX_IS_LINE_SEPARATOR;
  72   lex['"'] = LEX_IS_STRINGQUOTE;
  73   lex['\''] = LEX_IS_ONECHAR_QUOTE;
  74   lex[':'] = LEX_IS_COLON;
  75
  76
  77
  78 #ifdef SINGLE_QUOTE_STRINGS
  79         lex['\''] = LEX_IS_STRINGQUOTE;
  80 #endif
  81
  82   /* Note that these override the previous defaults, e.g. if ';'
  83
  84            is a comment char, then it isn't a line separator.  */
  85   for (p = symbol_chars; *p; ++p)
  86     {
  87       lex[*p] = LEX_IS_SYMBOL_COMPONENT;
  88     }                           /* declare symbol characters */
  89
  90   for (p = comment_chars; *p; p++)
  91     {
  92       lex[*p] = LEX_IS_COMMENT_START;
  93     }                           /* declare comment chars */
  94
  95   for (p = line_comment_chars; *p; p++)
  96     {
  97       lex[*p] = LEX_IS_LINE_COMMENT_START;
  98     }                           /* declare line comment chars */
  99
 100   for (p = line_separator_chars; *p; p++)
 101     {
 102       lex[*p] = LEX_IS_LINE_SEPARATOR;
 103     }                           /* declare line separators */
 104
 105   /* Only allow slash-star comments if slash is not in use */
 106   if (lex['/'] == 0)
 107     {
 108       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 109     }
 110   /* FIXME-soon.  This is a bad hack but otherwise, we
 111            can't do c-style comments when '/' is a line
 112            comment char. xoxorich. */
 113   if (lex['*'] == 0)
 114     {
 115       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 116     }
 117 }                               /* do_scrub_begin() */
 118
 119 FILE *scrub_file;
 120
 121 int
 122 scrub_from_file ()
 123 {
 124   return getc (scrub_file);
 125 }
 126
 127 void
 128 scrub_to_file (ch)
 129      int ch;
 130 {
 131   ungetc (ch, scrub_file);
 132 }                               /* scrub_to_file() */
 133
 134 char *scrub_string;
 135 char *scrub_last_string;
 136
 137 int
 138 scrub_from_string ()
 139 {
 140   return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 141 }                               /* scrub_from_string() */
 142
 143 void
 144 scrub_to_string (ch)
 145      int ch;
 146 {
 147   *--scrub_string = ch;
 148 }                               /* scrub_to_string() */
 149
 150 /* Saved state of the scrubber */
 151 static int state;
 152 static int old_state;
 153 static char *out_string;
 154 static char out_buf[20];
 155 static int add_newlines = 0;
 156
 157 /* Data structure for saving the state of app across #include's.  Note that
 158    app is called asynchronously to the parsing of the .include's, so our
 159    state at the time .include is interpreted is completely unrelated.
 160    That's why we have to save it all.  */
 161
 162 struct app_save
 163   {
 164     int state;
 165     int old_state;
 166     char *out_string;
 167     char out_buf[sizeof (out_buf)];
 168     int add_newlines;
 169     char *scrub_string;
 170     char *scrub_last_string;
 171     FILE *scrub_file;
 172   };
 173
 174 char *
 175 app_push ()
 176 {
 177   register struct app_save *saved;
 178
 179   saved = (struct app_save *) xmalloc (sizeof (*saved));
 180   saved->state = state;
 181   saved->old_state = old_state;
 182   saved->out_string = out_string;
 183   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 184   saved->add_newlines = add_newlines;
 185   saved->scrub_string = scrub_string;
 186   saved->scrub_last_string = scrub_last_string;
 187   saved->scrub_file = scrub_file;
 188
 189   /* do_scrub_begin() is not useful, just wastes time. */
 190   return (char *) saved;
 191 }
 192
 193 void
 194 app_pop (arg)
 195      char *arg;
 196 {
 197   register struct app_save *saved = (struct app_save *) arg;
 198
 199   /* There is no do_scrub_end (). */
 200   state = saved->state;
 201   old_state = saved->old_state;
 202   out_string = saved->out_string;
 203   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 204   add_newlines = saved->add_newlines;
 205   scrub_string = saved->scrub_string;
 206   scrub_last_string = saved->scrub_last_string;
 207   scrub_file = saved->scrub_file;
 208
 209   free (arg);
 210 }                               /* app_pop() */
 211
 212 /* @@ This assumes that \n &c are the same on host and target.  This is not
 213    necessarily true.  */
 214 int
 215 process_escape (ch)
 216      char ch;
 217 {
 218   switch (ch)
 219     {
 220     case 'b':
 221       return '\b';
 222     case 'f':
 223       return '\f';
 224     case 'n':
 225       return '\n';
 226     case 'r':
 227       return '\r';
 228     case 't':
 229       return '\t';
 230     case '\'':
 231       return '\'';
 232     case '"':
 233       return '\"';
 234     default:
 235       return ch;
 236     }
 237 }
 238 int
 239 do_scrub_next_char (get, unget)
 240      int (*get) ();
 241      void (*unget) ();
 242 {
 243   /*State 0: beginning of normal line
 244           1: After first whitespace on line (flush more white)
 245           2: After first non-white (opcode) on line (keep 1white)
 246           3: after second white on line (into operands) (flush white)
 247           4: after putting out a .line, put out digits
 248           5: parsing a string, then go to old-state
 249           6: putting out \ escape in a "d string.
 250           7: After putting out a .appfile, put out string.
 251           8: After putting out a .appfile string, flush until newline.
 252           9: After seeing symbol char in state 3 (keep 1white after symchar)
 253          10: After seeing whitespace in state 9 (keep white before symchar)
 254           -1: output string in out_string and go to the state in old_state
 255           -2: flush text until a '*' '/' is seen, then go to state old_state
 256           */
 257
 258   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 259      constructs like ``.loc 1 20''.  This was turning into ``.loc
 260      120''.  States 9 and 10 ensure that a space is never dropped in
 261      between characters which could appear in a identifier.  Ian
 262      Taylor, [email protected].  */
 263
 264   register int ch, ch2 = 0;
 265
 266   switch (state)
 267     {
 268     case -1:
 269       ch = *out_string++;
 270       if (*out_string == 0)
 271         {
 272           state = old_state;
 273           old_state = 3;
 274         }
 275       return ch;
 276
 277     case -2:
 278       for (;;)
 279         {
 280           do
 281             {
 282               ch = (*get) ();
 283             }
 284           while (ch != EOF && ch != '\n' && ch != '*');
 285           if (ch == '\n' || ch == EOF)
 286             return ch;
 287
 288           /* At this point, ch must be a '*' */
 289           while ((ch = (*get) ()) == '*')
 290             {
 291               ;
 292             }
 293           if (ch == EOF || ch == '/')
 294             break;
 295           (*unget) (ch);
 296         }
 297       state = old_state;
 298       return ' ';
 299
 300     case 4:
 301       ch = (*get) ();
 302       if (ch == EOF || (ch >= '0' && ch <= '9'))
 303         return ch;
 304       else
 305         {
 306           while (ch != EOF && IS_WHITESPACE (ch))
 307             ch = (*get) ();
 308           if (ch == '"')
 309             {
 310               (*unget) (ch);
 311               out_string = "\n.appfile ";
 312               old_state = 7;
 313               state = -1;
 314               return *out_string++;
 315             }
 316           else
 317             {
 318               while (ch != EOF && ch != '\n')
 319                 ch = (*get) ();
 320               return ch;
 321             }
 322         }
 323
 324     case 5:
 325       ch = (*get) ();
 326       if (lex[ch] == LEX_IS_STRINGQUOTE)
 327         {
 328           state = old_state;
 329           return ch;
 330         }
 331       else if (ch == '\\')
 332         {
 333           state = 6;
 334           return ch;
 335         }
 336       else if (ch == EOF)
 337         {
 338           as_warn ("End of file in string: inserted '\"'");
 339           state = old_state;
 340           (*unget) ('\n');
 341           return '"';
 342         }
 343       else
 344         {
 345           return ch;
 346         }
 347
 348     case 6:
 349       state = 5;
 350       ch = (*get) ();
 351       switch (ch)
 352         {
 353           /* Handle strings broken across lines, by turning '\n' into
 354              '\\' and 'n'.  */
 355         case '\n':
 356           (*unget) ('n');
 357           add_newlines++;
 358           return '\\';
 359
 360         case '"':
 361         case '\\':
 362         case 'b':
 363         case 'f':
 364         case 'n':
 365         case 'r':
 366         case 't':
 367 #ifdef BACKSLASH_V
 368         case 'v':
 369 #endif /* BACKSLASH_V */
 370         case '0':
 371         case '1':
 372         case '2':
 373         case '3':
 374         case '4':
 375         case '5':
 376         case '6':
 377         case '7':
 378           break;
 379 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 380         default:
 381           as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 382           break;
 383 #else /* ONLY_STANDARD_ESCAPES */
 384         default:
 385           /* Accept \x as x for any x */
 386           break;
 387 #endif /* ONLY_STANDARD_ESCAPES */
 388
 389         case EOF:
 390           as_warn ("End of file in string: '\"' inserted");
 391           return '"';
 392         }
 393       return ch;
 394
 395     case 7:
 396       ch = (*get) ();
 397       state = 5;
 398       old_state = 8;
 399       return ch;
 400
 401     case 8:
 402       do
 403         ch = (*get) ();
 404       while (ch != '\n');
 405       state = 0;
 406       return ch;
 407     }
 408
 409   /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
 410
 411   /* flushchar: */
 412   ch = (*get) ();
 413 recycle:
 414   if (ch == EOF)
 415     {
 416       if (state != 0)
 417         as_warn ("End of file not at end of a line: Newline inserted.");
 418       return ch;
 419     }
 420
 421   switch (lex[ch])
 422     {
 423     case LEX_IS_WHITESPACE:
 424       do
 425         ch = (*get) ();
 426       while (ch != EOF && IS_WHITESPACE (ch));
 427       if (ch == EOF)
 428         return ch;
 429
 430       if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
 431         {
 432           goto recycle;
 433         }
 434 #ifdef MRI
 435       (*unget) (ch);            /* Put back */
 436       return ' ';               /* Always return one space at start of line */
 437 #endif
 438
 439       /* If we're in state 2, we've seen a non-white
 440          character followed by whitespace.  If the next
 441          character is ':', this is whitespace after a label
 442          name which we can ignore.  */
 443       if (state == 2 && lex[ch] == LEX_IS_COLON)
 444         {
 445           state = 0;
 446           return ch;
 447         }
 448
 449       switch (state)
 450         {
 451         case 0:
 452           state++;
 453           goto recycle;         /* Punted leading sp */
 454         case 1:
 455           BAD_CASE (state);     /* We can't get here */
 456         case 2:
 457           state = 3;
 458           (*unget) (ch);
 459           return ' ';           /* Sp after opco */
 460         case 3:
 461           goto recycle;         /* Sp in operands */
 462         case 9:
 463         case 10:
 464           state = 10;           /* Sp after symbol char */
 465           goto recycle;
 466         default:
 467           BAD_CASE (state);
 468         }
 469       break;
 470
 471     case LEX_IS_TWOCHAR_COMMENT_1ST:
 472       ch2 = (*get) ();
 473       if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 474         {
 475           for (;;)
 476             {
 477               do
 478                 {
 479                   ch2 = (*get) ();
 480                   if (ch2 != EOF && IS_NEWLINE (ch2))
 481                     add_newlines++;
 482                 }
 483               while (ch2 != EOF &&
 484                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 485
 486               while (ch2 != EOF &&
 487                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 488                 {
 489                   ch2 = (*get) ();
 490                 }
 491
 492               if (ch2 == EOF
 493                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 494                 break;
 495               (*unget) (ch);
 496             }
 497           if (ch2 == EOF)
 498             as_warn ("End of file in multiline comment");
 499
 500           ch = ' ';
 501           goto recycle;
 502         }
 503       else
 504         {
 505           if (ch2 != EOF)
 506             (*unget) (ch2);
 507           if (state == 9 || state == 10)
 508             state = 3;
 509           return ch;
 510         }
 511       break;
 512
 513     case LEX_IS_STRINGQUOTE:
 514       if (state == 9 || state == 10)
 515         old_state = 3;
 516       else
 517         old_state = state;
 518       state = 5;
 519       return ch;
 520 #ifndef MRI
 521 #ifndef IEEE_STYLE
 522     case LEX_IS_ONECHAR_QUOTE:
 523       ch = (*get) ();
 524       if (ch == EOF)
 525         {
 526           as_warn ("End-of-file after a one-character quote; \\000 inserted");
 527           ch = 0;
 528         }
 529       if (ch == '\\')
 530         {
 531           ch = (*get) ();
 532           ch = process_escape (ch);
 533         }
 534       sprintf (out_buf, "%d", (int) (unsigned char) ch);
 535
 536
 537       /* None of these 'x constants for us.  We want 'x'.  */
 538       if ((ch = (*get) ()) != '\'')
 539         {
 540 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 541           as_warn ("Missing close quote: (assumed)");
 542 #else
 543           (*unget) (ch);
 544 #endif
 545         }
 546       if (strlen (out_buf) == 1)
 547         {
 548           return out_buf[0];
 549         }
 550       if (state == 9 || state == 10)
 551         old_state = 3;
 552       else
 553         old_state = state;
 554       state = -1;
 555       out_string = out_buf;
 556       return *out_string++;
 557 #endif
 558 #endif
 559     case LEX_IS_COLON:
 560       if (state == 9 || state == 10)
 561         state = 3;
 562       else if (state != 3)
 563         state = 0;
 564       return ch;
 565
 566     case LEX_IS_NEWLINE:
 567       /* Roll out a bunch of newlines from inside comments, etc.  */
 568       if (add_newlines)
 569         {
 570           --add_newlines;
 571           (*unget) (ch);
 572         }
 573       /* fall thru into... */
 574
 575     case LEX_IS_LINE_SEPARATOR:
 576       state = 0;
 577       return ch;
 578
 579     case LEX_IS_LINE_COMMENT_START:
 580       if (state == 0)           /* Only comment at start of line.  */
 581         {
 582           /* FIXME-someday: The two character comment stuff was badly
 583              thought out.  On i386, we want '/' as line comment start
 584              AND we want C style comments.  hence this hack.  The
 585              whole lexical process should be reworked.  xoxorich.  */
 586           if (ch == '/')
 587             {
 588               ch2 = (*get) ();
 589               if (ch2 == '*')
 590                 {
 591                   state = -2;
 592                   return (do_scrub_next_char (get, unget));
 593                 }
 594               else
 595                 {
 596                   (*unget) (ch2);
 597                 }
 598             }                   /* bad hack */
 599
 600           do
 601             ch = (*get) ();
 602           while (ch != EOF && IS_WHITESPACE (ch));
 603           if (ch == EOF)
 604             {
 605               as_warn ("EOF in comment:  Newline inserted");
 606               return '\n';
 607             }
 608           if (ch < '0' || ch > '9')
 609             {
 610               /* Non-numerics:  Eat whole comment line */
 611               while (ch != EOF && !IS_NEWLINE (ch))
 612                 ch = (*get) ();
 613               if (ch == EOF)
 614                 as_warn ("EOF in Comment: Newline inserted");
 615               state = 0;
 616               return '\n';
 617             }
 618           /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 619           (*unget) (ch);
 620           old_state = 4;
 621           state = -1;
 622           out_string = ".appline ";
 623           return *out_string++;
 624         }
 625
 626       /* We have a line comment character which is not at the start of
 627          a line.  If this is also a normal comment character, fall
 628          through.  Otherwise treat it as a default character.  */
 629       if (strchr (comment_chars, ch) == NULL)
 630         goto de_fault;
 631       /* Fall through.  */
 632     case LEX_IS_COMMENT_START:
 633       do
 634         ch = (*get) ();
 635       while (ch != EOF && !IS_NEWLINE (ch));
 636       if (ch == EOF)
 637         as_warn ("EOF in comment:  Newline inserted");
 638       state = 0;
 639       return '\n';
 640
 641     case LEX_IS_SYMBOL_COMPONENT:
 642       if (state == 10)
 643         {
 644           /* This is a symbol character following another symbol
 645              character, with whitespace in between.  We skipped the
 646              whitespace earlier, so output it now.  */
 647           (*unget) (ch);
 648           state = 3;
 649           return ' ';
 650         }
 651       if (state == 3)
 652         state = 9;
 653       /* Fall through.  */
 654     default:
 655     de_fault:
 656       /* Some relatively `normal' character.  */
 657       if (state == 0)
 658         {
 659           state = 2;            /* Now seeing opcode */
 660           return ch;
 661         }
 662       else if (state == 1)
 663         {
 664           state = 2;            /* Ditto */
 665           return ch;
 666         }
 667       else if (state == 9)
 668         {
 669           if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
 670             state = 3;
 671           return ch;
 672         }
 673       else if (state == 10)
 674         {
 675           state = 3;
 676           return ch;
 677         }
 678       else
 679         {
 680           return ch;            /* Opcode or operands already */
 681         }
 682     }
 683   return -1;
 684 }
 685
 686 #ifdef TEST
 687
 688 const char comment_chars[] = "|";
 689 const char line_comment_chars[] = "#";
 690
 691 main ()
 692 {
 693   int ch;
 694
 695   app_begin ();
 696   while ((ch = do_scrub_next_char (stdin)) != EOF)
 697     putc (ch, stdout);
 698 }
 699
 700 as_warn (str)
 701      char *str;
 702 {
 703   fputs (str, stderr);
 704   putc ('\n', stderr);
 705 }
 706
 707 #endif
 708
 709 /* end of app.c */