gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to
  18    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  21 /* App, the assembler pre-processor.  This pre-processor strips out excess
  22    spaces, turns single-quoted characters into a decimal constant, and turns
  23    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  24    pair.  This needs better error-handling.
  25    */
  26
  27 #include <stdio.h>
  28 #include "as.h"                 /* For BAD_CASE() only */
  29
  30 #if (__STDC__ != 1) && !defined(const)
  31 #define const                   /* Nothing */
  32 #endif
  33
  34 static char lex[256];
  35 static const char symbol_chars[] =
  36 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  37
  38 #define LEX_IS_SYMBOL_COMPONENT         1
  39 #define LEX_IS_WHITESPACE               2
  40 #define LEX_IS_LINE_SEPARATOR           3
  41 #define LEX_IS_COMMENT_START            4
  42 #define LEX_IS_LINE_COMMENT_START       5
  43 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  44 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  45 #define LEX_IS_STRINGQUOTE              8
  46 #define LEX_IS_COLON                    9
  47 #define LEX_IS_NEWLINE                  10
  48 #define LEX_IS_ONECHAR_QUOTE            11
  49 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  50 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  51 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  52 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  53 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  54 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  55
  56 static int process_escape PARAMS ((int));
  57
  58 /* FIXME-soon: The entire lexer/parser thingy should be
  59    built statically at compile time rather than dynamically
  60    each and every time the assembler is run.  xoxorich. */
  61
  62 void
  63 do_scrub_begin ()
  64 {
  65   const char *p;
  66
  67   lex[' '] = LEX_IS_WHITESPACE;
  68   lex['\t'] = LEX_IS_WHITESPACE;
  69   lex['\n'] = LEX_IS_NEWLINE;
  70   lex[';'] = LEX_IS_LINE_SEPARATOR;
  71   lex['"'] = LEX_IS_STRINGQUOTE;
  72 #ifndef TC_HPPA
  73   lex['\''] = LEX_IS_ONECHAR_QUOTE;
  74 #endif
  75   lex[':'] = LEX_IS_COLON;
  76
  77
  78
  79 #ifdef SINGLE_QUOTE_STRINGS
  80         lex['\''] = LEX_IS_STRINGQUOTE;
  81 #endif
  82
  83   /* Note that these override the previous defaults, e.g. if ';'
  84
  85            is a comment char, then it isn't a line separator.  */
  86   for (p = symbol_chars; *p; ++p)
  87     {
  88       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
  89     }                           /* declare symbol characters */
  90
  91   for (p = comment_chars; *p; p++)
  92     {
  93       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
  94     }                           /* declare comment chars */
  95
  96   for (p = line_comment_chars; *p; p++)
  97     {
  98       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
  99     }                           /* declare line comment chars */
 100
 101   for (p = line_separator_chars; *p; p++)
 102     {
 103       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 104     }                           /* declare line separators */
 105
 106   /* Only allow slash-star comments if slash is not in use */
 107   if (lex['/'] == 0)
 108     {
 109       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 110     }
 111   /* FIXME-soon.  This is a bad hack but otherwise, we
 112            can't do c-style comments when '/' is a line
 113            comment char. xoxorich. */
 114   if (lex['*'] == 0)
 115     {
 116       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 117     }
 118 }                               /* do_scrub_begin() */
 119
 120 FILE *scrub_file;
 121
 122 int
 123 scrub_from_file ()
 124 {
 125   return getc (scrub_file);
 126 }
 127
 128 void
 129 scrub_to_file (ch)
 130      int ch;
 131 {
 132   ungetc (ch, scrub_file);
 133 }                               /* scrub_to_file() */
 134
 135 char *scrub_string;
 136 char *scrub_last_string;
 137
 138 int
 139 scrub_from_string ()
 140 {
 141   return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 142 }                               /* scrub_from_string() */
 143
 144 void
 145 scrub_to_string (ch)
 146      int ch;
 147 {
 148   *--scrub_string = ch;
 149 }                               /* scrub_to_string() */
 150
 151 /* Saved state of the scrubber */
 152 static int state;
 153 static int old_state;
 154 static char *out_string;
 155 static char out_buf[20];
 156 static int add_newlines = 0;
 157
 158 /* Data structure for saving the state of app across #include's.  Note that
 159    app is called asynchronously to the parsing of the .include's, so our
 160    state at the time .include is interpreted is completely unrelated.
 161    That's why we have to save it all.  */
 162
 163 struct app_save
 164   {
 165     int state;
 166     int old_state;
 167     char *out_string;
 168     char out_buf[sizeof (out_buf)];
 169     int add_newlines;
 170     char *scrub_string;
 171     char *scrub_last_string;
 172     FILE *scrub_file;
 173   };
 174
 175 char *
 176 app_push ()
 177 {
 178   register struct app_save *saved;
 179
 180   saved = (struct app_save *) xmalloc (sizeof (*saved));
 181   saved->state = state;
 182   saved->old_state = old_state;
 183   saved->out_string = out_string;
 184   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 185   saved->add_newlines = add_newlines;
 186   saved->scrub_string = scrub_string;
 187   saved->scrub_last_string = scrub_last_string;
 188   saved->scrub_file = scrub_file;
 189
 190   /* do_scrub_begin() is not useful, just wastes time. */
 191   return (char *) saved;
 192 }
 193
 194 void
 195 app_pop (arg)
 196      char *arg;
 197 {
 198   register struct app_save *saved = (struct app_save *) arg;
 199
 200   /* There is no do_scrub_end (). */
 201   state = saved->state;
 202   old_state = saved->old_state;
 203   out_string = saved->out_string;
 204   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 205   add_newlines = saved->add_newlines;
 206   scrub_string = saved->scrub_string;
 207   scrub_last_string = saved->scrub_last_string;
 208   scrub_file = saved->scrub_file;
 209
 210   free (arg);
 211 }                               /* app_pop() */
 212
 213 /* @@ This assumes that \n &c are the same on host and target.  This is not
 214    necessarily true.  */
 215 static int
 216 process_escape (ch)
 217      int ch;
 218 {
 219   switch (ch)
 220     {
 221     case 'b':
 222       return '\b';
 223     case 'f':
 224       return '\f';
 225     case 'n':
 226       return '\n';
 227     case 'r':
 228       return '\r';
 229     case 't':
 230       return '\t';
 231     case '\'':
 232       return '\'';
 233     case '"':
 234       return '\"';
 235     default:
 236       return ch;
 237     }
 238 }
 239 int
 240 do_scrub_next_char (get, unget)
 241      int (*get) ();
 242      void (*unget) ();
 243 {
 244   /*State 0: beginning of normal line
 245           1: After first whitespace on line (flush more white)
 246           2: After first non-white (opcode) on line (keep 1white)
 247           3: after second white on line (into operands) (flush white)
 248           4: after putting out a .line, put out digits
 249           5: parsing a string, then go to old-state
 250           6: putting out \ escape in a "d string.
 251           7: After putting out a .appfile, put out string.
 252           8: After putting out a .appfile string, flush until newline.
 253           9: After seeing symbol char in state 3 (keep 1white after symchar)
 254          10: After seeing whitespace in state 9 (keep white before symchar)
 255           -1: output string in out_string and go to the state in old_state
 256           -2: flush text until a '*' '/' is seen, then go to state old_state
 257           */
 258
 259   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 260      constructs like ``.loc 1 20''.  This was turning into ``.loc
 261      120''.  States 9 and 10 ensure that a space is never dropped in
 262      between characters which could appear in a identifier.  Ian
 263      Taylor, [email protected].  */
 264
 265   register int ch, ch2 = 0;
 266   int not_cpp_line = 0;
 267
 268   switch (state)
 269     {
 270     case -1:
 271       ch = *out_string++;
 272       if (*out_string == 0)
 273         {
 274           state = old_state;
 275           old_state = 3;
 276         }
 277       return ch;
 278
 279     case -2:
 280       for (;;)
 281         {
 282           do
 283             {
 284               ch = (*get) ();
 285             }
 286           while (ch != EOF && ch != '\n' && ch != '*');
 287           if (ch == '\n' || ch == EOF)
 288             return ch;
 289
 290           /* At this point, ch must be a '*' */
 291           while ((ch = (*get) ()) == '*')
 292             {
 293               ;
 294             }
 295           if (ch == EOF || ch == '/')
 296             break;
 297           (*unget) (ch);
 298         }
 299       state = old_state;
 300       return ' ';
 301
 302     case 4:
 303       ch = (*get) ();
 304       if (ch == EOF || (ch >= '0' && ch <= '9'))
 305         return ch;
 306       else
 307         {
 308           while (ch != EOF && IS_WHITESPACE (ch))
 309             ch = (*get) ();
 310           if (ch == '"')
 311             {
 312               (*unget) (ch);
 313               out_string = "\n.appfile ";
 314               old_state = 7;
 315               state = -1;
 316               return *out_string++;
 317             }
 318           else
 319             {
 320               while (ch != EOF && ch != '\n')
 321                 ch = (*get) ();
 322               state = 0;
 323               return ch;
 324             }
 325         }
 326
 327     case 5:
 328       ch = (*get) ();
 329       if (lex[ch] == LEX_IS_STRINGQUOTE)
 330         {
 331           state = old_state;
 332           return ch;
 333         }
 334       else if (ch == '\\')
 335         {
 336           state = 6;
 337           return ch;
 338         }
 339       else if (ch == EOF)
 340         {
 341           as_warn ("End of file in string: inserted '\"'");
 342           state = old_state;
 343           (*unget) ('\n');
 344           return '"';
 345         }
 346       else
 347         {
 348           return ch;
 349         }
 350
 351     case 6:
 352       state = 5;
 353       ch = (*get) ();
 354       switch (ch)
 355         {
 356           /* Handle strings broken across lines, by turning '\n' into
 357              '\\' and 'n'.  */
 358         case '\n':
 359           (*unget) ('n');
 360           add_newlines++;
 361           return '\\';
 362
 363         case '"':
 364         case '\\':
 365         case 'b':
 366         case 'f':
 367         case 'n':
 368         case 'r':
 369         case 't':
 370 #ifdef BACKSLASH_V
 371         case 'v':
 372 #endif /* BACKSLASH_V */
 373         case 'x':
 374         case 'X':
 375         case '0':
 376         case '1':
 377         case '2':
 378         case '3':
 379         case '4':
 380         case '5':
 381         case '6':
 382         case '7':
 383           break;
 384 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 385         default:
 386           as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 387           break;
 388 #else /* ONLY_STANDARD_ESCAPES */
 389         default:
 390           /* Accept \x as x for any x */
 391           break;
 392 #endif /* ONLY_STANDARD_ESCAPES */
 393
 394         case EOF:
 395           as_warn ("End of file in string: '\"' inserted");
 396           return '"';
 397         }
 398       return ch;
 399
 400     case 7:
 401       ch = (*get) ();
 402       state = 5;
 403       old_state = 8;
 404       return ch;
 405
 406     case 8:
 407       do
 408         ch = (*get) ();
 409       while (ch != '\n');
 410       state = 0;
 411       return ch;
 412     }
 413
 414   /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
 415
 416   /* flushchar: */
 417   ch = (*get) ();
 418 recycle:
 419   if (ch == EOF)
 420     {
 421       if (state != 0)
 422         as_warn ("End of file not at end of a line: Newline inserted.");
 423       return ch;
 424     }
 425
 426   switch (lex[ch])
 427     {
 428     case LEX_IS_WHITESPACE:
 429       do
 430         /* Preserve a single whitespace character at the beginning of
 431            a line.  */
 432         if (state == 0)
 433           {
 434             state = 1;
 435             return ch;
 436           }
 437         else
 438           ch = (*get) ();
 439       while (ch != EOF && IS_WHITESPACE (ch));
 440       if (ch == EOF)
 441         return ch;
 442
 443       if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
 444         {
 445           /* cpp never outputs a leading space before the #, so try to
 446              avoid being confused.  */
 447           not_cpp_line = 1;
 448           goto recycle;
 449         }
 450 #ifdef MRI
 451       (*unget) (ch);            /* Put back */
 452       return ' ';               /* Always return one space at start of line */
 453 #endif
 454
 455       /* If we're in state 2, we've seen a non-white
 456          character followed by whitespace.  If the next
 457          character is ':', this is whitespace after a label
 458          name which we can ignore.  */
 459       if (state == 2 && lex[ch] == LEX_IS_COLON)
 460         {
 461           state = 0;
 462           return ch;
 463         }
 464
 465       switch (state)
 466         {
 467         case 0:
 468           state++;
 469           goto recycle;         /* Punted leading sp */
 470         case 1:
 471           /* We can arrive here if we leave a leading whitespace character
 472              at the beginning of a line.  */
 473           goto recycle;
 474         case 2:
 475           state = 3;
 476           (*unget) (ch);
 477           return ' ';           /* Sp after opco */
 478         case 3:
 479           goto recycle;         /* Sp in operands */
 480         case 9:
 481         case 10:
 482           state = 10;           /* Sp after symbol char */
 483           goto recycle;
 484         default:
 485           BAD_CASE (state);
 486         }
 487       break;
 488
 489     case LEX_IS_TWOCHAR_COMMENT_1ST:
 490       ch2 = (*get) ();
 491       if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 492         {
 493           for (;;)
 494             {
 495               do
 496                 {
 497                   ch2 = (*get) ();
 498                   if (ch2 != EOF && IS_NEWLINE (ch2))
 499                     add_newlines++;
 500                 }
 501               while (ch2 != EOF &&
 502                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 503
 504               while (ch2 != EOF &&
 505                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 506                 {
 507                   ch2 = (*get) ();
 508                 }
 509
 510               if (ch2 == EOF
 511                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 512                 break;
 513               (*unget) (ch);
 514             }
 515           if (ch2 == EOF)
 516             as_warn ("End of file in multiline comment");
 517
 518           ch = ' ';
 519           goto recycle;
 520         }
 521       else
 522         {
 523           if (ch2 != EOF)
 524             (*unget) (ch2);
 525           if (state == 9 || state == 10)
 526             state = 3;
 527           return ch;
 528         }
 529       break;
 530
 531     case LEX_IS_STRINGQUOTE:
 532       if (state == 9 || state == 10)
 533         old_state = 3;
 534       else
 535         old_state = state;
 536       state = 5;
 537       return ch;
 538 #ifndef MRI
 539 #ifndef IEEE_STYLE
 540     case LEX_IS_ONECHAR_QUOTE:
 541       ch = (*get) ();
 542       if (ch == EOF)
 543         {
 544           as_warn ("End-of-file after a one-character quote; \\000 inserted");
 545           ch = 0;
 546         }
 547       if (ch == '\\')
 548         {
 549           ch = (*get) ();
 550           ch = process_escape (ch);
 551         }
 552       sprintf (out_buf, "%d", (int) (unsigned char) ch);
 553
 554
 555       /* None of these 'x constants for us.  We want 'x'.  */
 556       if ((ch = (*get) ()) != '\'')
 557         {
 558 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 559           as_warn ("Missing close quote: (assumed)");
 560 #else
 561           (*unget) (ch);
 562 #endif
 563         }
 564       if (strlen (out_buf) == 1)
 565         {
 566           return out_buf[0];
 567         }
 568       if (state == 9 || state == 10)
 569         old_state = 3;
 570       else
 571         old_state = state;
 572       state = -1;
 573       out_string = out_buf;
 574       return *out_string++;
 575 #endif
 576 #endif
 577     case LEX_IS_COLON:
 578       if (state == 9 || state == 10)
 579         state = 3;
 580       else if (state != 3)
 581         state = 0;
 582       return ch;
 583
 584     case LEX_IS_NEWLINE:
 585       /* Roll out a bunch of newlines from inside comments, etc.  */
 586       if (add_newlines)
 587         {
 588           --add_newlines;
 589           (*unget) (ch);
 590         }
 591       /* fall thru into... */
 592
 593     case LEX_IS_LINE_SEPARATOR:
 594       state = 0;
 595       return ch;
 596
 597     case LEX_IS_LINE_COMMENT_START:
 598       if (state == 0)           /* Only comment at start of line.  */
 599         {
 600           /* FIXME-someday: The two character comment stuff was badly
 601              thought out.  On i386, we want '/' as line comment start
 602              AND we want C style comments.  hence this hack.  The
 603              whole lexical process should be reworked.  xoxorich.  */
 604           if (ch == '/')
 605             {
 606               ch2 = (*get) ();
 607               if (ch2 == '*')
 608                 {
 609                   state = -2;
 610                   return (do_scrub_next_char (get, unget));
 611                 }
 612               else
 613                 {
 614                   (*unget) (ch2);
 615                 }
 616             }                   /* bad hack */
 617
 618           if (ch != '#')
 619             not_cpp_line = 1;
 620
 621           do
 622             ch = (*get) ();
 623           while (ch != EOF && IS_WHITESPACE (ch));
 624           if (ch == EOF)
 625             {
 626               as_warn ("EOF in comment:  Newline inserted");
 627               return '\n';
 628             }
 629           if (ch < '0' || ch > '9' || not_cpp_line)
 630             {
 631               /* Non-numerics:  Eat whole comment line */
 632               while (ch != EOF && !IS_NEWLINE (ch))
 633                 ch = (*get) ();
 634               if (ch == EOF)
 635                 as_warn ("EOF in Comment: Newline inserted");
 636               state = 0;
 637               return '\n';
 638             }
 639           /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 640           (*unget) (ch);
 641           old_state = 4;
 642           state = -1;
 643           out_string = ".appline ";
 644           return *out_string++;
 645         }
 646
 647       /* We have a line comment character which is not at the start of
 648          a line.  If this is also a normal comment character, fall
 649          through.  Otherwise treat it as a default character.  */
 650       if (strchr (comment_chars, ch) == NULL)
 651         goto de_fault;
 652       /* Fall through.  */
 653     case LEX_IS_COMMENT_START:
 654       do
 655         ch = (*get) ();
 656       while (ch != EOF && !IS_NEWLINE (ch));
 657       if (ch == EOF)
 658         as_warn ("EOF in comment:  Newline inserted");
 659       state = 0;
 660       return '\n';
 661
 662     case LEX_IS_SYMBOL_COMPONENT:
 663       if (state == 10)
 664         {
 665           /* This is a symbol character following another symbol
 666              character, with whitespace in between.  We skipped the
 667              whitespace earlier, so output it now.  */
 668           (*unget) (ch);
 669           state = 3;
 670           return ' ';
 671         }
 672       if (state == 3)
 673         state = 9;
 674       /* Fall through.  */
 675     default:
 676     de_fault:
 677       /* Some relatively `normal' character.  */
 678       if (state == 0)
 679         {
 680           state = 2;            /* Now seeing opcode */
 681           return ch;
 682         }
 683       else if (state == 1)
 684         {
 685           state = 2;            /* Ditto */
 686           return ch;
 687         }
 688       else if (state == 9)
 689         {
 690           if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
 691             state = 3;
 692           return ch;
 693         }
 694       else if (state == 10)
 695         {
 696           state = 3;
 697           return ch;
 698         }
 699       else
 700         {
 701           return ch;            /* Opcode or operands already */
 702         }
 703     }
 704   return -1;
 705 }
 706
 707 #ifdef TEST
 708
 709 const char comment_chars[] = "|";
 710 const char line_comment_chars[] = "#";
 711
 712 main ()
 713 {
 714   int ch;
 715
 716   app_begin ();
 717   while ((ch = do_scrub_next_char (stdin)) != EOF)
 718     putc (ch, stdout);
 719 }
 720
 721 as_warn (str)
 722      char *str;
 723 {
 724   fputs (str, stderr);
 725   putc ('\n', stderr);
 726 }
 727
 728 #endif
 729
 730 /* end of app.c */