gas/app.c

   1 /* This is the Assembler Pre-Processor
   2    Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
   3
   4    This file is part of GAS, the GNU Assembler.
   5
   6    GAS is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2, or (at your option)
   9    any later version.
  10
  11    GAS is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with GAS; see the file COPYING.  If not, write to
  18    the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20 /* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
  21 /* App, the assembler pre-processor.  This pre-processor strips out excess
  22    spaces, turns single-quoted characters into a decimal constant, and turns
  23    # <number> <filename> <garbage> into a .line <number>\n.file <filename>
  24    pair.  This needs better error-handling.
  25    */
  26
  27 #include <stdio.h>
  28 #include "as.h"                 /* For BAD_CASE() only */
  29
  30 #if (__STDC__ != 1)
  31 #ifndef const
  32 #define const  /* empty */
  33 #endif
  34 #endif
  35
  36 static char lex[256];
  37 static const char symbol_chars[] =
  38 "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
  39
  40 #define LEX_IS_SYMBOL_COMPONENT         1
  41 #define LEX_IS_WHITESPACE               2
  42 #define LEX_IS_LINE_SEPARATOR           3
  43 #define LEX_IS_COMMENT_START            4
  44 #define LEX_IS_LINE_COMMENT_START       5
  45 #define LEX_IS_TWOCHAR_COMMENT_1ST      6
  46 #define LEX_IS_TWOCHAR_COMMENT_2ND      7
  47 #define LEX_IS_STRINGQUOTE              8
  48 #define LEX_IS_COLON                    9
  49 #define LEX_IS_NEWLINE                  10
  50 #define LEX_IS_ONECHAR_QUOTE            11
  51 #define IS_SYMBOL_COMPONENT(c)          (lex[c] == LEX_IS_SYMBOL_COMPONENT)
  52 #define IS_WHITESPACE(c)                (lex[c] == LEX_IS_WHITESPACE)
  53 #define IS_LINE_SEPARATOR(c)            (lex[c] == LEX_IS_LINE_SEPARATOR)
  54 #define IS_COMMENT(c)                   (lex[c] == LEX_IS_COMMENT_START)
  55 #define IS_LINE_COMMENT(c)              (lex[c] == LEX_IS_LINE_COMMENT_START)
  56 #define IS_NEWLINE(c)                   (lex[c] == LEX_IS_NEWLINE)
  57
  58 static int process_escape PARAMS ((int));
  59
  60 /* FIXME-soon: The entire lexer/parser thingy should be
  61    built statically at compile time rather than dynamically
  62    each and every time the assembler is run.  xoxorich. */
  63
  64 void
  65 do_scrub_begin ()
  66 {
  67   const char *p;
  68
  69   lex[' '] = LEX_IS_WHITESPACE;
  70   lex['\t'] = LEX_IS_WHITESPACE;
  71   lex['\n'] = LEX_IS_NEWLINE;
  72   lex[';'] = LEX_IS_LINE_SEPARATOR;
  73   lex['"'] = LEX_IS_STRINGQUOTE;
  74 #ifndef TC_HPPA
  75   lex['\''] = LEX_IS_ONECHAR_QUOTE;
  76 #endif
  77   lex[':'] = LEX_IS_COLON;
  78
  79
  80
  81 #ifdef SINGLE_QUOTE_STRINGS
  82         lex['\''] = LEX_IS_STRINGQUOTE;
  83 #endif
  84
  85   /* Note that these override the previous defaults, e.g. if ';'
  86
  87            is a comment char, then it isn't a line separator.  */
  88   for (p = symbol_chars; *p; ++p)
  89     {
  90       lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
  91     }                           /* declare symbol characters */
  92
  93   for (p = comment_chars; *p; p++)
  94     {
  95       lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
  96     }                           /* declare comment chars */
  97
  98   for (p = line_comment_chars; *p; p++)
  99     {
 100       lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
 101     }                           /* declare line comment chars */
 102
 103   for (p = line_separator_chars; *p; p++)
 104     {
 105       lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
 106     }                           /* declare line separators */
 107
 108   /* Only allow slash-star comments if slash is not in use */
 109   if (lex['/'] == 0)
 110     {
 111       lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
 112     }
 113   /* FIXME-soon.  This is a bad hack but otherwise, we
 114            can't do c-style comments when '/' is a line
 115            comment char. xoxorich. */
 116   if (lex['*'] == 0)
 117     {
 118       lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
 119     }
 120 }                               /* do_scrub_begin() */
 121
 122 FILE *scrub_file;
 123
 124 int
 125 scrub_from_file ()
 126 {
 127   return getc (scrub_file);
 128 }
 129
 130 void
 131 scrub_to_file (ch)
 132      int ch;
 133 {
 134   ungetc (ch, scrub_file);
 135 }                               /* scrub_to_file() */
 136
 137 char *scrub_string;
 138 char *scrub_last_string;
 139
 140 int
 141 scrub_from_string ()
 142 {
 143   return scrub_string == scrub_last_string ? EOF : *scrub_string++;
 144 }                               /* scrub_from_string() */
 145
 146 void
 147 scrub_to_string (ch)
 148      int ch;
 149 {
 150   *--scrub_string = ch;
 151 }                               /* scrub_to_string() */
 152
 153 /* Saved state of the scrubber */
 154 static int state;
 155 static int old_state;
 156 static char *out_string;
 157 static char out_buf[20];
 158 static int add_newlines = 0;
 159
 160 /* Data structure for saving the state of app across #include's.  Note that
 161    app is called asynchronously to the parsing of the .include's, so our
 162    state at the time .include is interpreted is completely unrelated.
 163    That's why we have to save it all.  */
 164
 165 struct app_save
 166   {
 167     int state;
 168     int old_state;
 169     char *out_string;
 170     char out_buf[sizeof (out_buf)];
 171     int add_newlines;
 172     char *scrub_string;
 173     char *scrub_last_string;
 174     FILE *scrub_file;
 175   };
 176
 177 char *
 178 app_push ()
 179 {
 180   register struct app_save *saved;
 181
 182   saved = (struct app_save *) xmalloc (sizeof (*saved));
 183   saved->state = state;
 184   saved->old_state = old_state;
 185   saved->out_string = out_string;
 186   memcpy (saved->out_buf, out_buf, sizeof (out_buf));
 187   saved->add_newlines = add_newlines;
 188   saved->scrub_string = scrub_string;
 189   saved->scrub_last_string = scrub_last_string;
 190   saved->scrub_file = scrub_file;
 191
 192   /* do_scrub_begin() is not useful, just wastes time. */
 193   return (char *) saved;
 194 }
 195
 196 void
 197 app_pop (arg)
 198      char *arg;
 199 {
 200   register struct app_save *saved = (struct app_save *) arg;
 201
 202   /* There is no do_scrub_end (). */
 203   state = saved->state;
 204   old_state = saved->old_state;
 205   out_string = saved->out_string;
 206   memcpy (out_buf, saved->out_buf, sizeof (out_buf));
 207   add_newlines = saved->add_newlines;
 208   scrub_string = saved->scrub_string;
 209   scrub_last_string = saved->scrub_last_string;
 210   scrub_file = saved->scrub_file;
 211
 212   free (arg);
 213 }                               /* app_pop() */
 214
 215 /* @@ This assumes that \n &c are the same on host and target.  This is not
 216    necessarily true.  */
 217 static int
 218 process_escape (ch)
 219      int ch;
 220 {
 221   switch (ch)
 222     {
 223     case 'b':
 224       return '\b';
 225     case 'f':
 226       return '\f';
 227     case 'n':
 228       return '\n';
 229     case 'r':
 230       return '\r';
 231     case 't':
 232       return '\t';
 233     case '\'':
 234       return '\'';
 235     case '"':
 236       return '\"';
 237     default:
 238       return ch;
 239     }
 240 }
 241 int
 242 do_scrub_next_char (get, unget)
 243      int (*get) ();
 244      void (*unget) ();
 245 {
 246   /*State 0: beginning of normal line
 247           1: After first whitespace on line (flush more white)
 248           2: After first non-white (opcode) on line (keep 1white)
 249           3: after second white on line (into operands) (flush white)
 250           4: after putting out a .line, put out digits
 251           5: parsing a string, then go to old-state
 252           6: putting out \ escape in a "d string.
 253           7: After putting out a .appfile, put out string.
 254           8: After putting out a .appfile string, flush until newline.
 255           9: After seeing symbol char in state 3 (keep 1white after symchar)
 256          10: After seeing whitespace in state 9 (keep white before symchar)
 257           -1: output string in out_string and go to the state in old_state
 258           -2: flush text until a '*' '/' is seen, then go to state old_state
 259           */
 260
 261   /* I added states 9 and 10 because the MIPS ECOFF assembler uses
 262      constructs like ``.loc 1 20''.  This was turning into ``.loc
 263      120''.  States 9 and 10 ensure that a space is never dropped in
 264      between characters which could appear in a identifier.  Ian
 265      Taylor, [email protected].  */
 266
 267   register int ch, ch2 = 0;
 268   int not_cpp_line = 0;
 269
 270   switch (state)
 271     {
 272     case -1:
 273       ch = *out_string++;
 274       if (*out_string == 0)
 275         {
 276           state = old_state;
 277           old_state = 3;
 278         }
 279       return ch;
 280
 281     case -2:
 282       for (;;)
 283         {
 284           do
 285             {
 286               ch = (*get) ();
 287             }
 288           while (ch != EOF && ch != '\n' && ch != '*');
 289           if (ch == '\n' || ch == EOF)
 290             return ch;
 291
 292           /* At this point, ch must be a '*' */
 293           while ((ch = (*get) ()) == '*')
 294             {
 295               ;
 296             }
 297           if (ch == EOF || ch == '/')
 298             break;
 299           (*unget) (ch);
 300         }
 301       state = old_state;
 302       return ' ';
 303
 304     case 4:
 305       ch = (*get) ();
 306       if (ch == EOF || (ch >= '0' && ch <= '9'))
 307         return ch;
 308       else
 309         {
 310           while (ch != EOF && IS_WHITESPACE (ch))
 311             ch = (*get) ();
 312           if (ch == '"')
 313             {
 314               (*unget) (ch);
 315               out_string = "\n\t.appfile ";
 316               old_state = 7;
 317               state = -1;
 318               return *out_string++;
 319             }
 320           else
 321             {
 322               while (ch != EOF && ch != '\n')
 323                 ch = (*get) ();
 324               state = 0;
 325               return ch;
 326             }
 327         }
 328
 329     case 5:
 330       ch = (*get) ();
 331       if (lex[ch] == LEX_IS_STRINGQUOTE)
 332         {
 333           state = old_state;
 334           return ch;
 335         }
 336       else if (ch == '\\')
 337         {
 338           state = 6;
 339           return ch;
 340         }
 341       else if (ch == EOF)
 342         {
 343           as_warn ("End of file in string: inserted '\"'");
 344           state = old_state;
 345           (*unget) ('\n');
 346           return '"';
 347         }
 348       else
 349         {
 350           return ch;
 351         }
 352
 353     case 6:
 354       state = 5;
 355       ch = (*get) ();
 356       switch (ch)
 357         {
 358           /* Handle strings broken across lines, by turning '\n' into
 359              '\\' and 'n'.  */
 360         case '\n':
 361           (*unget) ('n');
 362           add_newlines++;
 363           return '\\';
 364
 365         case '"':
 366         case '\\':
 367         case 'b':
 368         case 'f':
 369         case 'n':
 370         case 'r':
 371         case 't':
 372 #ifdef BACKSLASH_V
 373         case 'v':
 374 #endif /* BACKSLASH_V */
 375         case 'x':
 376         case 'X':
 377         case '0':
 378         case '1':
 379         case '2':
 380         case '3':
 381         case '4':
 382         case '5':
 383         case '6':
 384         case '7':
 385           break;
 386 #if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
 387         default:
 388           as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
 389           break;
 390 #else /* ONLY_STANDARD_ESCAPES */
 391         default:
 392           /* Accept \x as x for any x */
 393           break;
 394 #endif /* ONLY_STANDARD_ESCAPES */
 395
 396         case EOF:
 397           as_warn ("End of file in string: '\"' inserted");
 398           return '"';
 399         }
 400       return ch;
 401
 402     case 7:
 403       ch = (*get) ();
 404       state = 5;
 405       old_state = 8;
 406       return ch;
 407
 408     case 8:
 409       do
 410         ch = (*get) ();
 411       while (ch != '\n');
 412       state = 0;
 413       return ch;
 414     }
 415
 416   /* OK, we are somewhere in states 0 through 4 or 9 through 10 */
 417
 418   /* flushchar: */
 419   ch = (*get) ();
 420 recycle:
 421   if (ch == EOF)
 422     {
 423       if (state != 0)
 424         as_warn ("End of file not at end of a line: Newline inserted.");
 425       return ch;
 426     }
 427
 428   switch (lex[ch])
 429     {
 430     case LEX_IS_WHITESPACE:
 431       do
 432         /* Preserve a single whitespace character at the beginning of
 433            a line.  */
 434         if (state == 0)
 435           {
 436             state = 1;
 437             return ch;
 438           }
 439         else
 440           ch = (*get) ();
 441       while (ch != EOF && IS_WHITESPACE (ch));
 442       if (ch == EOF)
 443         return ch;
 444
 445       if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
 446         {
 447           /* cpp never outputs a leading space before the #, so try to
 448              avoid being confused.  */
 449           not_cpp_line = 1;
 450           goto recycle;
 451         }
 452 #ifdef MRI
 453       (*unget) (ch);            /* Put back */
 454       return ' ';               /* Always return one space at start of line */
 455 #endif
 456
 457       /* If we're in state 2, we've seen a non-white
 458          character followed by whitespace.  If the next
 459          character is ':', this is whitespace after a label
 460          name which we can ignore.  */
 461       if (state == 2 && lex[ch] == LEX_IS_COLON)
 462         {
 463           state = 0;
 464           return ch;
 465         }
 466
 467       switch (state)
 468         {
 469         case 0:
 470           state++;
 471           goto recycle;         /* Punted leading sp */
 472         case 1:
 473           /* We can arrive here if we leave a leading whitespace character
 474              at the beginning of a line.  */
 475           goto recycle;
 476         case 2:
 477           state = 3;
 478           (*unget) (ch);
 479           return ' ';           /* Sp after opco */
 480         case 3:
 481           goto recycle;         /* Sp in operands */
 482         case 9:
 483         case 10:
 484           state = 10;           /* Sp after symbol char */
 485           goto recycle;
 486         default:
 487           BAD_CASE (state);
 488         }
 489       break;
 490
 491     case LEX_IS_TWOCHAR_COMMENT_1ST:
 492       ch2 = (*get) ();
 493       if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
 494         {
 495           for (;;)
 496             {
 497               do
 498                 {
 499                   ch2 = (*get) ();
 500                   if (ch2 != EOF && IS_NEWLINE (ch2))
 501                     add_newlines++;
 502                 }
 503               while (ch2 != EOF &&
 504                      (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
 505
 506               while (ch2 != EOF &&
 507                      (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
 508                 {
 509                   ch2 = (*get) ();
 510                 }
 511
 512               if (ch2 == EOF
 513                   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
 514                 break;
 515               (*unget) (ch);
 516             }
 517           if (ch2 == EOF)
 518             as_warn ("End of file in multiline comment");
 519
 520           ch = ' ';
 521           goto recycle;
 522         }
 523       else
 524         {
 525           if (ch2 != EOF)
 526             (*unget) (ch2);
 527           if (state == 9 || state == 10)
 528             state = 3;
 529           return ch;
 530         }
 531       break;
 532
 533     case LEX_IS_STRINGQUOTE:
 534       if (state == 9 || state == 10)
 535         old_state = 3;
 536       else
 537         old_state = state;
 538       state = 5;
 539       return ch;
 540 #ifndef MRI
 541 #ifndef IEEE_STYLE
 542     case LEX_IS_ONECHAR_QUOTE:
 543       ch = (*get) ();
 544       if (ch == EOF)
 545         {
 546           as_warn ("End-of-file after a one-character quote; \\000 inserted");
 547           ch = 0;
 548         }
 549       if (ch == '\\')
 550         {
 551           ch = (*get) ();
 552           ch = process_escape (ch);
 553         }
 554       sprintf (out_buf, "%d", (int) (unsigned char) ch);
 555
 556
 557       /* None of these 'x constants for us.  We want 'x'.  */
 558       if ((ch = (*get) ()) != '\'')
 559         {
 560 #ifdef REQUIRE_CHAR_CLOSE_QUOTE
 561           as_warn ("Missing close quote: (assumed)");
 562 #else
 563           (*unget) (ch);
 564 #endif
 565         }
 566       if (strlen (out_buf) == 1)
 567         {
 568           return out_buf[0];
 569         }
 570       if (state == 9 || state == 10)
 571         old_state = 3;
 572       else
 573         old_state = state;
 574       state = -1;
 575       out_string = out_buf;
 576       return *out_string++;
 577 #endif
 578 #endif
 579     case LEX_IS_COLON:
 580       if (state == 9 || state == 10)
 581         state = 3;
 582       else if (state != 3)
 583         state = 0;
 584       return ch;
 585
 586     case LEX_IS_NEWLINE:
 587       /* Roll out a bunch of newlines from inside comments, etc.  */
 588       if (add_newlines)
 589         {
 590           --add_newlines;
 591           (*unget) (ch);
 592         }
 593       /* fall thru into... */
 594
 595     case LEX_IS_LINE_SEPARATOR:
 596       state = 0;
 597       return ch;
 598
 599     case LEX_IS_LINE_COMMENT_START:
 600       if (state == 0)           /* Only comment at start of line.  */
 601         {
 602           /* FIXME-someday: The two character comment stuff was badly
 603              thought out.  On i386, we want '/' as line comment start
 604              AND we want C style comments.  hence this hack.  The
 605              whole lexical process should be reworked.  xoxorich.  */
 606           if (ch == '/')
 607             {
 608               ch2 = (*get) ();
 609               if (ch2 == '*')
 610                 {
 611                   state = -2;
 612                   return (do_scrub_next_char (get, unget));
 613                 }
 614               else
 615                 {
 616                   (*unget) (ch2);
 617                 }
 618             }                   /* bad hack */
 619
 620           if (ch != '#')
 621             not_cpp_line = 1;
 622
 623           do
 624             ch = (*get) ();
 625           while (ch != EOF && IS_WHITESPACE (ch));
 626           if (ch == EOF)
 627             {
 628               as_warn ("EOF in comment:  Newline inserted");
 629               return '\n';
 630             }
 631           if (ch < '0' || ch > '9' || not_cpp_line)
 632             {
 633               /* Non-numerics:  Eat whole comment line */
 634               while (ch != EOF && !IS_NEWLINE (ch))
 635                 ch = (*get) ();
 636               if (ch == EOF)
 637                 as_warn ("EOF in Comment: Newline inserted");
 638               state = 0;
 639               return '\n';
 640             }
 641           /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
 642           (*unget) (ch);
 643           old_state = 4;
 644           state = -1;
 645           out_string = "\t.appline ";
 646           return *out_string++;
 647         }
 648
 649       /* We have a line comment character which is not at the start of
 650          a line.  If this is also a normal comment character, fall
 651          through.  Otherwise treat it as a default character.  */
 652       if (strchr (comment_chars, ch) == NULL)
 653         goto de_fault;
 654       /* Fall through.  */
 655     case LEX_IS_COMMENT_START:
 656       do
 657         ch = (*get) ();
 658       while (ch != EOF && !IS_NEWLINE (ch));
 659       if (ch == EOF)
 660         as_warn ("EOF in comment:  Newline inserted");
 661       state = 0;
 662       return '\n';
 663
 664     case LEX_IS_SYMBOL_COMPONENT:
 665       if (state == 10)
 666         {
 667           /* This is a symbol character following another symbol
 668              character, with whitespace in between.  We skipped the
 669              whitespace earlier, so output it now.  */
 670           (*unget) (ch);
 671           state = 3;
 672           return ' ';
 673         }
 674       if (state == 3)
 675         state = 9;
 676       /* Fall through.  */
 677     default:
 678     de_fault:
 679       /* Some relatively `normal' character.  */
 680       if (state == 0)
 681         {
 682           state = 2;            /* Now seeing opcode */
 683           return ch;
 684         }
 685       else if (state == 1)
 686         {
 687           state = 2;            /* Ditto */
 688           return ch;
 689         }
 690       else if (state == 9)
 691         {
 692           if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
 693             state = 3;
 694           return ch;
 695         }
 696       else if (state == 10)
 697         {
 698           state = 3;
 699           return ch;
 700         }
 701       else
 702         {
 703           return ch;            /* Opcode or operands already */
 704         }
 705     }
 706   return -1;
 707 }
 708
 709 #ifdef TEST
 710
 711 const char comment_chars[] = "|";
 712 const char line_comment_chars[] = "#";
 713
 714 main ()
 715 {
 716   int ch;
 717
 718   app_begin ();
 719   while ((ch = do_scrub_next_char (stdin)) != EOF)
 720     putc (ch, stdout);
 721 }
 722
 723 as_warn (str)
 724      char *str;
 725 {
 726   fputs (str, stderr);
 727   putc ('\n', stderr);
 728 }
 729
 730 #endif
 731
 732 /* end of app.c */