[binutils.git] / gas / app.c

/* This is the Assembler Pre-Processor
   Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.

   This file is part of GAS, the GNU Assembler.

   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.file <filename>
   pair.  This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"			/* For BAD_CASE() only */

#if (__STDC__ != 1)
#ifndef const
#define const  /* empty */
#endif
#endif

static char lex[256];
static const char symbol_chars[] =
"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

static int process_escape PARAMS ((int));

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void 
do_scrub_begin ()
{
  const char *p;

  lex[' '] = LEX_IS_WHITESPACE;
  lex['\t'] = LEX_IS_WHITESPACE;
  lex['\n'] = LEX_IS_NEWLINE;
  lex[';'] = LEX_IS_LINE_SEPARATOR;
  lex['"'] = LEX_IS_STRINGQUOTE;
#ifndef TC_HPPA
  lex['\''] = LEX_IS_ONECHAR_QUOTE;
#endif
  lex[':'] = LEX_IS_COLON;


#ifdef SINGLE_QUOTE_STRINGS
	lex['\''] = LEX_IS_STRINGQUOTE;
#endif

  /* Note that these override the previous defaults, e.g. if ';'

	   is a comment char, then it isn't a line separator.  */
  for (p = symbol_chars; *p; ++p)
    {
      lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
    }				/* declare symbol characters */

  for (p = comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
    }				/* declare comment chars */

  for (p = line_comment_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
    }				/* declare line comment chars */

  for (p = line_separator_chars; *p; p++)
    {
      lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
    }				/* declare line separators */

  /* Only allow slash-star comments if slash is not in use */
  if (lex['/'] == 0)
    {
      lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
    }
  /* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
  if (lex['*'] == 0)
    {
      lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
    }
}				/* do_scrub_begin() */

FILE *scrub_file;

int 
scrub_from_file ()
{
  return getc (scrub_file);
}

void 
scrub_to_file (ch)
     int ch;
{
  ungetc (ch, scrub_file);
}				/* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int 
scrub_from_string ()
{
  return scrub_string == scrub_last_string ? EOF : *scrub_string++;
}				/* scrub_from_string() */

void 
scrub_to_string (ch)
     int ch;
{
  *--scrub_string = ch;
}				/* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save
  {
    int state;
    int old_state;
    char *out_string;
    char out_buf[sizeof (out_buf)];
    int add_newlines;
    char *scrub_string;
    char *scrub_last_string;
    FILE *scrub_file;
  };

char *
app_push ()
{
  register struct app_save *saved;

  saved = (struct app_save *) xmalloc (sizeof (*saved));
  saved->state = state;
  saved->old_state = old_state;
  saved->out_string = out_string;
  memcpy (saved->out_buf, out_buf, sizeof (out_buf));
  saved->add_newlines = add_newlines;
  saved->scrub_string = scrub_string;
  saved->scrub_last_string = scrub_last_string;
  saved->scrub_file = scrub_file;

  /* do_scrub_begin() is not useful, just wastes time. */
  return (char *) saved;
}

void 
app_pop (arg)
     char *arg;
{
  register struct app_save *saved = (struct app_save *) arg;

  /* There is no do_scrub_end (). */
  state = saved->state;
  old_state = saved->old_state;
  out_string = saved->out_string;
  memcpy (out_buf, saved->out_buf, sizeof (out_buf));
  add_newlines = saved->add_newlines;
  scrub_string = saved->scrub_string;
  scrub_last_string = saved->scrub_last_string;
  scrub_file = saved->scrub_file;

  free (arg);
}				/* app_pop() */

/* @@ This assumes that \n &c are the same on host and target.  This is not
   necessarily true.  */
static int 
process_escape (ch)
     int ch;
{
  switch (ch)
    {
    case 'b':
      return '\b';
    case 'f':
      return '\f';
    case 'n':
      return '\n';
    case 'r':
      return '\r';
    case 't':
      return '\t';
    case '\'':
      return '\'';
    case '"':
      return '\"';
    default:
      return ch;
    }
}
int 
do_scrub_next_char (get, unget)
     int (*get) ();
     void (*unget) ();
{
  /*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .appfile, put out string.
	  8: After putting out a .appfile string, flush until newline.
	  9: After seeing symbol char in state 3 (keep 1white after symchar)
	 10: After seeing whitespace in state 9 (keep white before symchar)
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */

  /* I added states 9 and 10 because the MIPS ECOFF assembler uses
     constructs like ``.loc 1 20''.  This was turning into ``.loc
     120''.  States 9 and 10 ensure that a space is never dropped in
     between characters which could appear in a identifier.  Ian
     Taylor, [email protected].  */

  register int ch, ch2 = 0;
  int not_cpp_line = 0;

  switch (state)
    {
    case -1:
      ch = *out_string++;
      if (*out_string == 0)
	{
	  state = old_state;
	  old_state = 3;
	}
      return ch;

    case -2:
      for (;;)
	{
	  do
	    {
	      ch = (*get) ();
	    }
	  while (ch != EOF && ch != '\n' && ch != '*');
	  if (ch == '\n' || ch == EOF)
	    return ch;

	  /* At this point, ch must be a '*' */
	  while ((ch = (*get) ()) == '*')
	    {
	      ;
	    }
	  if (ch == EOF || ch == '/')
	    break;
	  (*unget) (ch);
	}
      state = old_state;
      return ' ';

    case 4:
      ch = (*get) ();
      if (ch == EOF || (ch >= '0' && ch <= '9'))
	return ch;
      else
	{
	  while (ch != EOF && IS_WHITESPACE (ch))
	    ch = (*get) ();
	  if (ch == '"')
	    {
	      (*unget) (ch);
	      out_string = "\n\t.appfile ";
	      old_state = 7;
	      state = -1;
	      return *out_string++;
	    }
	  else
	    {
	      while (ch != EOF && ch != '\n')
		ch = (*get) ();
	      state = 0;
	      return ch;
	    }
	}

    case 5:
      ch = (*get) ();
      if (lex[ch] == LEX_IS_STRINGQUOTE)
	{
	  state = old_state;
	  return ch;
	}
      else if (ch == '\\')
	{
	  state = 6;
	  return ch;
	}
      else if (ch == EOF)
	{
	  as_warn ("End of file in string: inserted '\"'");
	  state = old_state;
	  (*unget) ('\n');
	  return '"';
	}
      else
	{
	  return ch;
	}

    case 6:
      state = 5;
      ch = (*get) ();
      switch (ch)
	{
	  /* Handle strings broken across lines, by turning '\n' into
	     '\\' and 'n'.  */
	case '\n':
	  (*unget) ('n');
	  add_newlines++;
	  return '\\';

	case '"':
	case '\\':
	case 'b':
	case 'f':
	case 'n':
	case 'r':
	case 't':
#ifdef BACKSLASH_V
	case 'v':
#endif /* BACKSLASH_V */
	case 'x':
	case 'X':
	case '0':
	case '1':
	case '2':
	case '3':
	case '4':
	case '5':
	case '6':
	case '7':
	  break;
#if defined(IGNORE_NONSTANDARD_ESCAPES) | defined(ONLY_STANDARD_ESCAPES)
	default:
	  as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	  break;
#else /* ONLY_STANDARD_ESCAPES */
	default:
	  /* Accept \x as x for any x */
	  break;
#endif /* ONLY_STANDARD_ESCAPES */

	case EOF:
	  as_warn ("End of file in string: '\"' inserted");
	  return '"';
	}
      return ch;

    case 7:
      ch = (*get) ();
      state = 5;
      old_state = 8;
      return ch;

    case 8:
      do
	ch = (*get) ();
      while (ch != '\n');
      state = 0;
      return ch;
    }

  /* OK, we are somewhere in states 0 through 4 or 9 through 10 */

  /* flushchar: */
  ch = (*get) ();
recycle:
  if (ch == EOF)
    {
      if (state != 0)
	as_warn ("End of file not at end of a line: Newline inserted.");
      return ch;
    }

  switch (lex[ch])
    {
    case LEX_IS_WHITESPACE:
      do
	/* Preserve a single whitespace character at the beginning of
	   a line.  */
	if (state == 0)
	  {
	    state = 1;
	    return ch;
	  }
	else
	  ch = (*get) ();
      while (ch != EOF && IS_WHITESPACE (ch));
      if (ch == EOF)
	return ch;

      if (IS_COMMENT (ch) || (state == 0 && IS_LINE_COMMENT (ch)) || ch == '/' || IS_LINE_SEPARATOR (ch))
	{
	  /* cpp never outputs a leading space before the #, so try to
	     avoid being confused.  */
	  not_cpp_line = 1;
	  goto recycle;
	}
#ifdef MRI
      (*unget) (ch);		/* Put back */
      return ' ';		/* Always return one space at start of line */
#endif

      /* If we're in state 2, we've seen a non-white
	 character followed by whitespace.  If the next
	 character is ':', this is whitespace after a label
	 name which we can ignore.  */
      if (state == 2 && lex[ch] == LEX_IS_COLON)
	{
	  state = 0;
	  return ch;
	}

      switch (state)
	{
	case 0:
	  state++;
	  goto recycle;		/* Punted leading sp */
	case 1:
	  /* We can arrive here if we leave a leading whitespace character
	     at the beginning of a line.  */
	  goto recycle;
	case 2:
	  state = 3;
	  (*unget) (ch);
	  return ' ';		/* Sp after opco */
	case 3:
	  goto recycle;		/* Sp in operands */
	case 9:
	case 10:
	  state = 10;		/* Sp after symbol char */
	  goto recycle;
	default:
	  BAD_CASE (state);
	}
      break;

    case LEX_IS_TWOCHAR_COMMENT_1ST:
      ch2 = (*get) ();
      if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	{
	  for (;;)
	    {
	      do
		{
		  ch2 = (*get) ();
		  if (ch2 != EOF && IS_NEWLINE (ch2))
		    add_newlines++;
		}
	      while (ch2 != EOF &&
		     (lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));

	      while (ch2 != EOF &&
		     (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
		{
		  ch2 = (*get) ();
		}

	      if (ch2 == EOF
		  || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
		break;
	      (*unget) (ch);
	    }
	  if (ch2 == EOF)
	    as_warn ("End of file in multiline comment");

	  ch = ' ';
	  goto recycle;
	}
      else
	{
	  if (ch2 != EOF)
	    (*unget) (ch2);
	  if (state == 9 || state == 10)
	    state = 3;
	  return ch;
	}
      break;

    case LEX_IS_STRINGQUOTE:
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = 5;
      return ch;
#ifndef MRI
#ifndef IEEE_STYLE
    case LEX_IS_ONECHAR_QUOTE:
      ch = (*get) ();
      if (ch == EOF)
	{
	  as_warn ("End-of-file after a one-character quote; \\000 inserted");
	  ch = 0;
	}
      if (ch == '\\')
	{
	  ch = (*get) ();
	  ch = process_escape (ch);
	}
      sprintf (out_buf, "%d", (int) (unsigned char) ch);


      /* None of these 'x constants for us.  We want 'x'.  */
      if ((ch = (*get) ()) != '\'')
	{
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	  as_warn ("Missing close quote: (assumed)");
#else
	  (*unget) (ch);
#endif
	}
      if (strlen (out_buf) == 1)
	{
	  return out_buf[0];
	}
      if (state == 9 || state == 10)
	old_state = 3;
      else
	old_state = state;
      state = -1;
      out_string = out_buf;
      return *out_string++;
#endif
#endif
    case LEX_IS_COLON:
      if (state == 9 || state == 10)
	state = 3;
      else if (state != 3)
	state = 0;
      return ch;

    case LEX_IS_NEWLINE:
      /* Roll out a bunch of newlines from inside comments, etc.  */
      if (add_newlines)
	{
	  --add_newlines;
	  (*unget) (ch);
	}
      /* fall thru into... */

    case LEX_IS_LINE_SEPARATOR:
      state = 0;
      return ch;

    case LEX_IS_LINE_COMMENT_START:
      if (state == 0)		/* Only comment at start of line.  */
	{
	  /* FIXME-someday: The two character comment stuff was badly
	     thought out.  On i386, we want '/' as line comment start
	     AND we want C style comments.  hence this hack.  The
	     whole lexical process should be reworked.  xoxorich.  */
	  if (ch == '/')
	    {
	      ch2 = (*get) ();
	      if (ch2 == '*')
		{
		  state = -2;
		  return (do_scrub_next_char (get, unget));
		}
	      else
		{
		  (*unget) (ch2);
		}
	    }			/* bad hack */

	  if (ch != '#')
	    not_cpp_line = 1;

	  do
	    ch = (*get) ();
	  while (ch != EOF && IS_WHITESPACE (ch));
	  if (ch == EOF)
	    {
	      as_warn ("EOF in comment:  Newline inserted");
	      return '\n';
	    }
	  if (ch < '0' || ch > '9' || not_cpp_line)
	    {
	      /* Non-numerics:  Eat whole comment line */
	      while (ch != EOF && !IS_NEWLINE (ch))
		ch = (*get) ();
	      if (ch == EOF)
		as_warn ("EOF in Comment: Newline inserted");
	      state = 0;
	      return '\n';
	    }
	  /* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
	  (*unget) (ch);
	  old_state = 4;
	  state = -1;
	  out_string = "\t.appline ";
	  return *out_string++;
	}

      /* We have a line comment character which is not at the start of
	 a line.  If this is also a normal comment character, fall
	 through.  Otherwise treat it as a default character.  */
      if (strchr (comment_chars, ch) == NULL)
	goto de_fault;
      /* Fall through.  */
    case LEX_IS_COMMENT_START:
      do
	ch = (*get) ();
      while (ch != EOF && !IS_NEWLINE (ch));
      if (ch == EOF)
	as_warn ("EOF in comment:  Newline inserted");
      state = 0;
      return '\n';

    case LEX_IS_SYMBOL_COMPONENT:
      if (state == 10)
	{
	  /* This is a symbol character following another symbol
	     character, with whitespace in between.  We skipped the
	     whitespace earlier, so output it now.  */
	  (*unget) (ch);
	  state = 3;
	  return ' ';
	}
      if (state == 3)
	state = 9;
      /* Fall through.  */
    default:
    de_fault:
      /* Some relatively `normal' character.  */
      if (state == 0)
	{
	  state = 2;		/* Now seeing opcode */
	  return ch;
	}
      else if (state == 1)
	{
	  state = 2;		/* Ditto */
	  return ch;
	}
      else if (state == 9)
	{
	  if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	    state = 3;
	  return ch;
	}
      else if (state == 10)
	{
	  state = 3;
	  return ch;
	}
      else
	{
	  return ch;		/* Opcode or operands already */
	}
    }
  return -1;
}

#ifdef TEST

const char comment_chars[] = "|";
const char line_comment_chars[] = "#";

main ()
{
  int ch;

  app_begin ();
  while ((ch = do_scrub_next_char (stdin)) != EOF)
    putc (ch, stdout);
}

as_warn (str)
     char *str;
{
  fputs (str, stderr);
  putc ('\n', stderr);
}

#endif

/* end of app.c */
Commit	Line	Data
fecd2382	1	/* This is the Assembler Pre-Processor
5a051773	2	Copyright (C) 1987, 1990, 1991, 1992, 1994 Free Software Foundation, Inc.
6efd877d	3
a39116f1	4	This file is part of GAS, the GNU Assembler.
6efd877d	5
a39116f1 RP	6	GAS is free software; you can redistribute it and/or modify
	7	it under the terms of the GNU General Public License as published by
	8	the Free Software Foundation; either version 2, or (at your option)
	9	any later version.
6efd877d	10
a39116f1 RP	11	GAS is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	GNU General Public License for more details.
6efd877d	15
a39116f1 RP	16	You should have received a copy of the GNU General Public License
	17	along with GAS; see the file COPYING. If not, write to
	18	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382	19
58d4951d	20	/* Modified by Allen Wirfs-Brock, Instantiations Inc 2/90 */
fecd2382 RP	21	/* App, the assembler pre-processor. This pre-processor strips out excess
fecd2382 RP	22	spaces, turns single-quoted characters into a decimal constant, and turns
9a7d824a	23	# <number> <filename> <garbage> into a .line <number>\n.file <filename>
be06bdcd	24	pair. This needs better error-handling.
a39116f1	25	*/
fecd2382 RP	26
fecd2382 RP	27	#include <stdio.h>
6efd877d	28	#include "as.h" /* For BAD_CASE() only */
fecd2382	29
5a051773 SS	30	#if (__STDC__ != 1)
	31	#ifndef const
	32	#define const /* empty */
	33	#endif
fecd2382 RP	34	#endif
fecd2382 RP	35
6efd877d	36	static char lex[256];
6d331d71	37	static const char symbol_chars[] =
6efd877d	38	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	39
	40	#define LEX_IS_SYMBOL_COMPONENT 1
	41	#define LEX_IS_WHITESPACE 2
	42	#define LEX_IS_LINE_SEPARATOR 3
	43	#define LEX_IS_COMMENT_START 4
	44	#define LEX_IS_LINE_COMMENT_START 5
	45	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	46	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	47	#define LEX_IS_STRINGQUOTE 8
	48	#define LEX_IS_COLON 9
	49	#define LEX_IS_NEWLINE 10
	50	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	51	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	52	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	53	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	54	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	55	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	56	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	57
385ce433 JL	58	static int process_escape PARAMS ((int));
385ce433 JL	59
a39116f1 RP	60	/* FIXME-soon: The entire lexer/parser thingy should be
	61	built statically at compile time rather than dynamically
	62	each and every time the assembler is run. xoxorich. */
fecd2382	63
6efd877d KR	64	void
	65	do_scrub_begin ()
	66	{
	67	const char *p;
	68
	69	lex[' '] = LEX_IS_WHITESPACE;
	70	lex['\t'] = LEX_IS_WHITESPACE;
	71	lex['\n'] = LEX_IS_NEWLINE;
	72	lex[';'] = LEX_IS_LINE_SEPARATOR;
	73	lex['"'] = LEX_IS_STRINGQUOTE;
58d4951d	74	#ifndef TC_HPPA
6efd877d	75	lex['\''] = LEX_IS_ONECHAR_QUOTE;
58d4951d	76	#endif
6efd877d	77	lex[':'] = LEX_IS_COLON;
7c2d4011	78
be06bdcd SC	79
	80
	81	#ifdef SINGLE_QUOTE_STRINGS
	82	lex['\''] = LEX_IS_STRINGQUOTE;
7c2d4011	83	#endif
be06bdcd	84
6efd877d	85	/* Note that these override the previous defaults, e.g. if ';'
be06bdcd	86
fecd2382	87	is a comment char, then it isn't a line separator. */
6efd877d KR	88	for (p = symbol_chars; *p; ++p)
6efd877d KR	89	{
58d4951d	90	lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT;
6efd877d KR	91	} /* declare symbol characters */
6efd877d KR	92
6efd877d KR	93	for (p = comment_chars; *p; p++)
6efd877d KR	94	{
58d4951d	95	lex[(unsigned char) *p] = LEX_IS_COMMENT_START;
6efd877d KR	96	} /* declare comment chars */
6efd877d KR	97
9a7d824a ILT	98	for (p = line_comment_chars; *p; p++)
9a7d824a ILT	99	{
58d4951d	100	lex[(unsigned char) *p] = LEX_IS_LINE_COMMENT_START;
9a7d824a ILT	101	} /* declare line comment chars */
9a7d824a ILT	102
6efd877d KR	103	for (p = line_separator_chars; *p; p++)
6efd877d KR	104	{
58d4951d	105	lex[(unsigned char) *p] = LEX_IS_LINE_SEPARATOR;
6efd877d KR	106	} /* declare line separators */
	107
	108	/* Only allow slash-star comments if slash is not in use */
	109	if (lex['/'] == 0)
	110	{
	111	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	112	}
	113	/* FIXME-soon. This is a bad hack but otherwise, we
a39116f1 RP	114	can't do c-style comments when '/' is a line
a39116f1 RP	115	comment char. xoxorich. */
6efd877d KR	116	if (lex['*'] == 0)
	117	{
	118	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	119	}
	120	} /* do_scrub_begin() */
fecd2382 RP	121
	122	FILE *scrub_file;
	123
6efd877d KR	124	int
	125	scrub_from_file ()
	126	{
	127	return getc (scrub_file);
fecd2382 RP	128	}
fecd2382 RP	129
6efd877d KR	130	void
	131	scrub_to_file (ch)
	132	int ch;
fecd2382	133	{
6efd877d KR	134	ungetc (ch, scrub_file);
6efd877d KR	135	} /* scrub_to_file() */
fecd2382 RP	136
	137	char *scrub_string;
	138	char *scrub_last_string;
	139
6efd877d KR	140	int
	141	scrub_from_string ()
	142	{
	143	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	144	} /* scrub_from_string() */
fecd2382	145
6efd877d KR	146	void
	147	scrub_to_string (ch)
	148	int ch;
fecd2382	149	{
6efd877d KR	150	*--scrub_string = ch;
6efd877d KR	151	} /* scrub_to_string() */
fecd2382 RP	152
	153	/* Saved state of the scrubber */
	154	static int state;
	155	static int old_state;
	156	static char *out_string;
	157	static char out_buf[20];
	158	static int add_newlines = 0;
	159
	160	/* Data structure for saving the state of app across #include's. Note that
	161	app is called asynchronously to the parsing of the .include's, so our
	162	state at the time .include is interpreted is completely unrelated.
	163	That's why we have to save it all. */
	164
6efd877d KR	165	struct app_save
	166	{
	167	int state;
	168	int old_state;
	169	char *out_string;
	170	char out_buf[sizeof (out_buf)];
	171	int add_newlines;
	172	char *scrub_string;
	173	char *scrub_last_string;
	174	FILE *scrub_file;
	175	};
	176
	177	char *
	178	app_push ()
	179	{
7c2d4011 SC	180	register struct app_save *saved;
7c2d4011 SC	181
6efd877d KR	182	saved = (struct app_save ) xmalloc (sizeof (saved));
	183	saved->state = state;
	184	saved->old_state = old_state;
	185	saved->out_string = out_string;
58d4951d	186	memcpy (saved->out_buf, out_buf, sizeof (out_buf));
6efd877d KR	187	saved->add_newlines = add_newlines;
6efd877d KR	188	saved->scrub_string = scrub_string;
7c2d4011	189	saved->scrub_last_string = scrub_last_string;
6efd877d	190	saved->scrub_file = scrub_file;
7c2d4011 SC	191
7c2d4011 SC	192	/* do_scrub_begin() is not useful, just wastes time. */
6efd877d	193	return (char *) saved;
fecd2382 RP	194	}
fecd2382 RP	195
6efd877d KR	196	void
	197	app_pop (arg)
	198	char *arg;
fecd2382	199	{
6efd877d KR	200	register struct app_save saved = (struct app_save ) arg;
	201
	202	/* There is no do_scrub_end (). */
	203	state = saved->state;
	204	old_state = saved->old_state;
	205	out_string = saved->out_string;
58d4951d	206	memcpy (out_buf, saved->out_buf, sizeof (out_buf));
6efd877d KR	207	add_newlines = saved->add_newlines;
	208	scrub_string = saved->scrub_string;
	209	scrub_last_string = saved->scrub_last_string;
	210	scrub_file = saved->scrub_file;
	211
	212	free (arg);
	213	} /* app_pop() */
	214
6d331d71 KR	215	/* @@ This assumes that \n &c are the same on host and target. This is not
6d331d71 KR	216	necessarily true. */
385ce433	217	static int
6efd877d	218	process_escape (ch)
385ce433	219	int ch;
7c2d4011	220	{
6efd877d KR	221	switch (ch)
	222	{
	223	case 'b':
	224	return '\b';
	225	case 'f':
	226	return '\f';
	227	case 'n':
	228	return '\n';
	229	case 'r':
	230	return '\r';
	231	case 't':
	232	return '\t';
	233	case '\'':
	234	return '\'';
	235	case '"':
6d331d71	236	return '\"';
6efd877d KR	237	default:
	238	return ch;
	239	}
7c2d4011	240	}
6efd877d KR	241	int
	242	do_scrub_next_char (get, unget)
	243	int (*get) ();
	244	void (*unget) ();
fecd2382	245	{
6efd877d	246	/*State 0: beginning of normal line
a39116f1 RP	247	1: After first whitespace on line (flush more white)
	248	2: After first non-white (opcode) on line (keep 1white)
	249	3: after second white on line (into operands) (flush white)
	250	4: after putting out a .line, put out digits
	251	5: parsing a string, then go to old-state
	252	6: putting out \ escape in a "d string.
9a7d824a ILT	253	7: After putting out a .appfile, put out string.
9a7d824a ILT	254	8: After putting out a .appfile string, flush until newline.
f6a91cc0	255	9: After seeing symbol char in state 3 (keep 1white after symchar)
9a7d824a	256	10: After seeing whitespace in state 9 (keep white before symchar)
a39116f1 RP	257	-1: output string in out_string and go to the state in old_state
	258	-2: flush text until a '*' '/' is seen, then go to state old_state
	259	*/
6efd877d	260
9a7d824a ILT	261	/* I added states 9 and 10 because the MIPS ECOFF assembler uses
	262	constructs like ``.loc 1 20''. This was turning into ``.loc
	263	120''. States 9 and 10 ensure that a space is never dropped in
	264	between characters which could appear in a identifier. Ian
	265	Taylor, [email protected]. */
f6a91cc0	266
6efd877d	267	register int ch, ch2 = 0;
385ce433	268	int not_cpp_line = 0;
6efd877d KR	269
	270	switch (state)
	271	{
	272	case -1:
	273	ch = *out_string++;
	274	if (*out_string == 0)
	275	{
	276	state = old_state;
	277	old_state = 3;
	278	}
	279	return ch;
	280
	281	case -2:
	282	for (;;)
	283	{
	284	do
	285	{
	286	ch = (*get) ();
	287	}
	288	while (ch != EOF && ch != '\n' && ch != '*');
	289	if (ch == '\n' \|\| ch == EOF)
	290	return ch;
	291
	292	/* At this point, ch must be a '' /
	293	while ((ch = (get) ()) == '')
	294	{
	295	;
	296	}
	297	if (ch == EOF \|\| ch == '/')
	298	break;
	299	(*unget) (ch);
	300	}
	301	state = old_state;
	302	return ' ';
	303
	304	case 4:
	305	ch = (*get) ();
	306	if (ch == EOF \|\| (ch >= '0' && ch <= '9'))
	307	return ch;
	308	else
	309	{
	310	while (ch != EOF && IS_WHITESPACE (ch))
	311	ch = (*get) ();
	312	if (ch == '"')
	313	{
	314	(*unget) (ch);
001581c7	315	out_string = "\n\t.appfile ";
6efd877d KR	316	old_state = 7;
	317	state = -1;
	318	return *out_string++;
	319	}
	320	else
	321	{
	322	while (ch != EOF && ch != '\n')
	323	ch = (*get) ();
58d4951d	324	state = 0;
6efd877d KR	325	return ch;
	326	}
	327	}
	328
	329	case 5:
	330	ch = (*get) ();
	331	if (lex[ch] == LEX_IS_STRINGQUOTE)
	332	{
	333	state = old_state;
	334	return ch;
	335	}
	336	else if (ch == '\\')
	337	{
	338	state = 6;
	339	return ch;
	340	}
	341	else if (ch == EOF)
	342	{
	343	as_warn ("End of file in string: inserted '\"'");
	344	state = old_state;
	345	(*unget) ('\n');
	346	return '"';
	347	}
	348	else
	349	{
	350	return ch;
	351	}
	352
	353	case 6:
	354	state = 5;
	355	ch = (*get) ();
	356	switch (ch)
	357	{
6d331d71 KR	358	/* Handle strings broken across lines, by turning '\n' into
6d331d71 KR	359	'\\' and 'n'. */
6efd877d KR	360	case '\n':
	361	(*unget) ('n');
	362	add_newlines++;
	363	return '\\';
	364
	365	case '"':
	366	case '\\':
	367	case 'b':
	368	case 'f':
	369	case 'n':
	370	case 'r':
	371	case 't':
fecd2382	372	#ifdef BACKSLASH_V
6efd877d	373	case 'v':
fecd2382	374	#endif /* BACKSLASH_V */
385ce433 JL	375	case 'x':
385ce433 JL	376	case 'X':
6efd877d KR	377	case '0':
	378	case '1':
	379	case '2':
	380	case '3':
	381	case '4':
	382	case '5':
	383	case '6':
	384	case '7':
	385	break;
7c2d4011	386	#if defined(IGNORE_NONSTANDARD_ESCAPES) \| defined(ONLY_STANDARD_ESCAPES)
6efd877d KR	387	default:
	388	as_warn ("Unknown escape '\\%c' in string: Ignored", ch);
	389	break;
fecd2382	390	#else /* ONLY_STANDARD_ESCAPES */
6efd877d KR	391	default:
	392	/* Accept \x as x for any x */
	393	break;
fecd2382	394	#endif /* ONLY_STANDARD_ESCAPES */
7c2d4011	395
6efd877d KR	396	case EOF:
	397	as_warn ("End of file in string: '\"' inserted");
	398	return '"';
	399	}
	400	return ch;
	401
	402	case 7:
	403	ch = (*get) ();
	404	state = 5;
	405	old_state = 8;
	406	return ch;
	407
	408	case 8:
	409	do
	410	ch = (*get) ();
	411	while (ch != '\n');
	412	state = 0;
	413	return ch;
	414	}
	415
9a7d824a	416	/* OK, we are somewhere in states 0 through 4 or 9 through 10 */
6efd877d KR	417
	418	/* flushchar: */
	419	ch = (*get) ();
	420	recycle:
	421	if (ch == EOF)
	422	{
	423	if (state != 0)
	424	as_warn ("End of file not at end of a line: Newline inserted.");
	425	return ch;
	426	}
	427
	428	switch (lex[ch])
	429	{
	430	case LEX_IS_WHITESPACE:
	431	do
385ce433 JL	432	/* Preserve a single whitespace character at the beginning of
	433	a line. */
	434	if (state == 0)
	435	{
	436	state = 1;
	437	return ch;
	438	}
	439	else
	440	ch = (*get) ();
6efd877d KR	441	while (ch != EOF && IS_WHITESPACE (ch));
	442	if (ch == EOF)
	443	return ch;
	444
	445	if (IS_COMMENT (ch) \|\| (state == 0 && IS_LINE_COMMENT (ch)) \|\| ch == '/' \|\| IS_LINE_SEPARATOR (ch))
	446	{
385ce433 JL	447	/* cpp never outputs a leading space before the #, so try to
	448	avoid being confused. */
	449	not_cpp_line = 1;
6efd877d	450	goto recycle;
fecd2382	451	}
5a051773 SS	452	#ifdef MRI
	453	(unget) (ch); / Put back */
	454	return ' '; /* Always return one space at start of line */
	455	#endif
6efd877d KR	456
6efd877d KR	457	/* If we're in state 2, we've seen a non-white
6d331d71 KR	458	character followed by whitespace. If the next
	459	character is ':', this is whitespace after a label
	460	name which we can ignore. */
6efd877d KR	461	if (state == 2 && lex[ch] == LEX_IS_COLON)
	462	{
	463	state = 0;
	464	return ch;
	465	}
	466
	467	switch (state)
	468	{
	469	case 0:
	470	state++;
	471	goto recycle; /* Punted leading sp */
	472	case 1:
385ce433 JL	473	/* We can arrive here if we leave a leading whitespace character
	474	at the beginning of a line. */
	475	goto recycle;
6efd877d	476	case 2:
f6a91cc0	477	state = 3;
6efd877d KR	478	(*unget) (ch);
	479	return ' '; /* Sp after opco */
	480	case 3:
	481	goto recycle; /* Sp in operands */
9a7d824a ILT	482	case 9:
	483	case 10:
	484	state = 10; /* Sp after symbol char */
	485	goto recycle;
6efd877d KR	486	default:
	487	BAD_CASE (state);
	488	}
	489	break;
	490
	491	case LEX_IS_TWOCHAR_COMMENT_1ST:
	492	ch2 = (*get) ();
	493	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)
	494	{
	495	for (;;)
	496	{
	497	do
	498	{
	499	ch2 = (*get) ();
	500	if (ch2 != EOF && IS_NEWLINE (ch2))
	501	add_newlines++;
fecd2382	502	}
6efd877d KR	503	while (ch2 != EOF &&
	504	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
	505
	506	while (ch2 != EOF &&
	507	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND))
	508	{
	509	ch2 = (*get) ();
fecd2382	510	}
6efd877d KR	511
	512	if (ch2 == EOF
	513	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
fecd2382	514	break;
6efd877d KR	515	(*unget) (ch);
	516	}
	517	if (ch2 == EOF)
	518	as_warn ("End of file in multiline comment");
	519
	520	ch = ' ';
	521	goto recycle;
	522	}
	523	else
	524	{
	525	if (ch2 != EOF)
	526	(*unget) (ch2);
9a7d824a ILT	527	if (state == 9 \|\| state == 10)
9a7d824a ILT	528	state = 3;
6efd877d KR	529	return ch;
	530	}
	531	break;
	532
	533	case LEX_IS_STRINGQUOTE:
9a7d824a ILT	534	if (state == 9 \|\| state == 10)
	535	old_state = 3;
	536	else
	537	old_state = state;
6efd877d KR	538	state = 5;
	539	return ch;
	540	#ifndef MRI
a39116f1	541	#ifndef IEEE_STYLE
6efd877d KR	542	case LEX_IS_ONECHAR_QUOTE:
	543	ch = (*get) ();
	544	if (ch == EOF)
	545	{
	546	as_warn ("End-of-file after a one-character quote; \\000 inserted");
	547	ch = 0;
	548	}
	549	if (ch == '\\')
	550	{
	551	ch = (*get) ();
	552	ch = process_escape (ch);
	553	}
	554	sprintf (out_buf, "%d", (int) (unsigned char) ch);
7c2d4011	555
6efd877d	556
9a7d824a	557	/* None of these 'x constants for us. We want 'x'. */
6efd877d KR	558	if ((ch = (*get) ()) != '\'')
6efd877d KR	559	{
fecd2382	560	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
6efd877d	561	as_warn ("Missing close quote: (assumed)");
fecd2382	562	#else
6efd877d	563	(*unget) (ch);
fecd2382	564	#endif
6efd877d KR	565	}
	566	if (strlen (out_buf) == 1)
	567	{
	568	return out_buf[0];
	569	}
9a7d824a ILT	570	if (state == 9 \|\| state == 10)
	571	old_state = 3;
	572	else
	573	old_state = state;
6efd877d KR	574	state = -1;
	575	out_string = out_buf;
	576	return *out_string++;
7c2d4011	577	#endif
a39116f1	578	#endif
6efd877d	579	case LEX_IS_COLON:
9a7d824a ILT	580	if (state == 9 \|\| state == 10)
	581	state = 3;
	582	else if (state != 3)
6efd877d KR	583	state = 0;
	584	return ch;
	585
	586	case LEX_IS_NEWLINE:
	587	/* Roll out a bunch of newlines from inside comments, etc. */
	588	if (add_newlines)
	589	{
	590	--add_newlines;
	591	(*unget) (ch);
	592	}
	593	/* fall thru into... */
	594
	595	case LEX_IS_LINE_SEPARATOR:
	596	state = 0;
	597	return ch;
	598
	599	case LEX_IS_LINE_COMMENT_START:
9a7d824a	600	if (state == 0) /* Only comment at start of line. */
6efd877d	601	{
9a7d824a ILT	602	/* FIXME-someday: The two character comment stuff was badly
	603	thought out. On i386, we want '/' as line comment start
	604	AND we want C style comments. hence this hack. The
	605	whole lexical process should be reworked. xoxorich. */
	606	if (ch == '/')
f6a91cc0	607	{
9a7d824a ILT	608	ch2 = (*get) ();
	609	if (ch2 == '*')
	610	{
	611	state = -2;
	612	return (do_scrub_next_char (get, unget));
	613	}
	614	else
	615	{
	616	(*unget) (ch2);
	617	}
	618	} /* bad hack */
6efd877d	619
385ce433 JL	620	if (ch != '#')
	621	not_cpp_line = 1;
	622
9a7d824a	623	do
6efd877d	624	ch = (*get) ();
9a7d824a	625	while (ch != EOF && IS_WHITESPACE (ch));
6efd877d	626	if (ch == EOF)
9a7d824a ILT	627	{
	628	as_warn ("EOF in comment: Newline inserted");
	629	return '\n';
	630	}
385ce433	631	if (ch < '0' \|\| ch > '9' \|\| not_cpp_line)
9a7d824a ILT	632	{
	633	/* Non-numerics: Eat whole comment line */
	634	while (ch != EOF && !IS_NEWLINE (ch))
	635	ch = (*get) ();
	636	if (ch == EOF)
	637	as_warn ("EOF in Comment: Newline inserted");
	638	state = 0;
	639	return '\n';
	640	}
	641	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	642	(*unget) (ch);
	643	old_state = 4;
	644	state = -1;
001581c7	645	out_string = "\t.appline ";
9a7d824a	646	return *out_string++;
6efd877d	647	}
6efd877d	648
9a7d824a ILT	649	/* We have a line comment character which is not at the start of
	650	a line. If this is also a normal comment character, fall
	651	through. Otherwise treat it as a default character. */
	652	if (strchr (comment_chars, ch) == NULL)
	653	goto de_fault;
	654	/* Fall through. */
6efd877d KR	655	case LEX_IS_COMMENT_START:
	656	do
	657	ch = (*get) ();
	658	while (ch != EOF && !IS_NEWLINE (ch));
	659	if (ch == EOF)
	660	as_warn ("EOF in comment: Newline inserted");
	661	state = 0;
	662	return '\n';
	663
f6a91cc0	664	case LEX_IS_SYMBOL_COMPONENT:
9a7d824a ILT	665	if (state == 10)
	666	{
	667	/* This is a symbol character following another symbol
	668	character, with whitespace in between. We skipped the
	669	whitespace earlier, so output it now. */
	670	(*unget) (ch);
	671	state = 3;
	672	return ' ';
	673	}
f6a91cc0 ILT	674	if (state == 3)
	675	state = 9;
	676	/* Fall through. */
6efd877d KR	677	default:
	678	de_fault:
	679	/* Some relatively `normal' character. */
	680	if (state == 0)
	681	{
	682	state = 2; /* Now seeing opcode */
	683	return ch;
fecd2382	684	}
6efd877d KR	685	else if (state == 1)
	686	{
	687	state = 2; /* Ditto */
	688	return ch;
	689	}
f6a91cc0 ILT	690	else if (state == 9)
	691	{
	692	if (lex[ch] != LEX_IS_SYMBOL_COMPONENT)
	693	state = 3;
	694	return ch;
	695	}
9a7d824a ILT	696	else if (state == 10)
	697	{
	698	state = 3;
	699	return ch;
	700	}
6efd877d KR	701	else
	702	{
	703	return ch; /* Opcode or operands already */
	704	}
	705	}
	706	return -1;
fecd2382 RP	707	}
	708
	709	#ifdef TEST
	710
6efd877d KR	711	const char comment_chars[] = "\|";
6efd877d KR	712	const char line_comment_chars[] = "#";
fecd2382	713
6efd877d	714	main ()
fecd2382	715	{
6efd877d KR	716	int ch;
	717
	718	app_begin ();
	719	while ((ch = do_scrub_next_char (stdin)) != EOF)
	720	putc (ch, stdout);
fecd2382 RP	721	}
fecd2382 RP	722
6efd877d KR	723	as_warn (str)
6efd877d KR	724	char *str;
fecd2382	725	{
6efd877d KR	726	fputs (str, stderr);
6efd877d KR	727	putc ('\n', stderr);
fecd2382	728	}
6efd877d	729
fecd2382 RP	730	#endif
fecd2382 RP	731
fecd2382	732	/* end of app.c */