[binutils.git] / gas / app.c

/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
   
   Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
   */
/* This is the Assembler Pre-Processor
   Copyright (C) 1987 Free Software Foundation, Inc.
   
   This file is part of GAS, the GNU Assembler.
   
   GAS is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.
   
   GAS is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   
   You should have received a copy of the GNU General Public License
   along with GAS; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */

/* App, the assembler pre-processor.  This pre-processor strips out excess
   spaces, turns single-quoted characters into a decimal constant, and turns
   # <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
   This needs better error-handling.
   */

#include <stdio.h>
#include "as.h"		/* For BAD_CASE() only */

#if (__STDC__ != 1) && !defined(const)
#define const /* Nothing */
#endif

static char	lex [256];
static char	symbol_chars[] = 
    "$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";

/* These will go in BSS if not defined elsewhere, producing empty strings. */
extern const char comment_chars[];
extern const char line_comment_chars[];
extern const char line_separator_chars[];

#define LEX_IS_SYMBOL_COMPONENT		1
#define LEX_IS_WHITESPACE		2
#define LEX_IS_LINE_SEPARATOR		3
#define LEX_IS_COMMENT_START		4
#define LEX_IS_LINE_COMMENT_START	5
#define	LEX_IS_TWOCHAR_COMMENT_1ST	6
#define	LEX_IS_TWOCHAR_COMMENT_2ND	7
#define	LEX_IS_STRINGQUOTE		8
#define	LEX_IS_COLON			9
#define	LEX_IS_NEWLINE			10
#define	LEX_IS_ONECHAR_QUOTE		11
#define IS_SYMBOL_COMPONENT(c)		(lex[c] == LEX_IS_SYMBOL_COMPONENT)
#define IS_WHITESPACE(c)		(lex[c] == LEX_IS_WHITESPACE)
#define IS_LINE_SEPARATOR(c)		(lex[c] == LEX_IS_LINE_SEPARATOR)
#define IS_COMMENT(c)			(lex[c] == LEX_IS_COMMENT_START)
#define IS_LINE_COMMENT(c)		(lex[c] == LEX_IS_LINE_COMMENT_START)
#define	IS_NEWLINE(c)			(lex[c] == LEX_IS_NEWLINE)

/* FIXME-soon: The entire lexer/parser thingy should be
   built statically at compile time rather than dynamically
   each and every time the assembler is run.  xoxorich. */

void do_scrub_begin() {
	const char *p;
	
	lex[' '] = LEX_IS_WHITESPACE;
	lex['\t'] = LEX_IS_WHITESPACE;
	lex['\n'] = LEX_IS_NEWLINE;
	lex[';'] = LEX_IS_LINE_SEPARATOR;
	lex['"'] = LEX_IS_STRINGQUOTE;
	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	lex[':'] = LEX_IS_COLON;
	
	/* Note that these override the previous defaults, e.g. if ';'
	   is a comment char, then it isn't a line separator.  */
	for (p = symbol_chars; *p; ++p) {
		lex[*p] = LEX_IS_SYMBOL_COMPONENT;
	} /* declare symbol characters */
	
	for (p = line_comment_chars; *p; p++) {
		lex[*p] = LEX_IS_LINE_COMMENT_START;
	} /* declare line comment chars */
	
	for (p = comment_chars; *p; p++) {
		lex[*p] = LEX_IS_COMMENT_START;
	} /* declare comment chars */
	
	for (p = line_separator_chars; *p; p++) {
		lex[*p] = LEX_IS_LINE_SEPARATOR;
	} /* declare line separators */
	
	/* Only allow slash-star comments if slash is not in use */
	if (lex['/'] == 0) {
		lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
	}
	/* FIXME-soon.  This is a bad hack but otherwise, we
	   can't do c-style comments when '/' is a line
	   comment char. xoxorich. */
	if (lex['*'] == 0) {
		lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	}
} /* do_scrub_begin() */

FILE *scrub_file;

int scrub_from_file() {
	return getc(scrub_file);
}

void scrub_to_file(ch)
int ch;
{
	ungetc(ch,scrub_file);
} /* scrub_to_file() */

char *scrub_string;
char *scrub_last_string;

int scrub_from_string() {
	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
} /* scrub_from_string() */

void scrub_to_string(ch)
int ch;
{
	*--scrub_string=ch;
} /* scrub_to_string() */

/* Saved state of the scrubber */
static int state;
static int old_state;
static char *out_string;
static char out_buf[20];
static int add_newlines = 0;

/* Data structure for saving the state of app across #include's.  Note that
   app is called asynchronously to the parsing of the .include's, so our
   state at the time .include is interpreted is completely unrelated.
   That's why we have to save it all.  */

struct app_save {
	int state;
	int old_state;
	char *out_string;
	char out_buf[sizeof (out_buf)];
	int add_newlines;
	char *scrub_string;
	char *scrub_last_string;
	FILE *scrub_file;
};

char *app_push() {
	register struct app_save *saved;
	
	saved = (struct app_save *) xmalloc(sizeof (*saved));
	saved->state		= state;
	saved->old_state	= old_state;
	saved->out_string	= out_string;
	bcopy(saved->out_buf, out_buf, sizeof(out_buf));
	saved->add_newlines	= add_newlines;
	saved->scrub_string	= scrub_string;
	saved->scrub_last_string = scrub_last_string;
	saved->scrub_file	= scrub_file;
	
	/* do_scrub_begin() is not useful, just wastes time. */
	return (char *)saved;
}

void app_pop(arg)
char *arg;
{
	register struct app_save *saved = (struct app_save *)arg;
	
	/* There is no do_scrub_end (). */
	state		= saved->state;
	old_state	= saved->old_state;
	out_string	= saved->out_string;
	bcopy (out_buf,  saved->out_buf, sizeof (out_buf));
	add_newlines	= saved->add_newlines;
	scrub_string	= saved->scrub_string;
	scrub_last_string = saved->scrub_last_string;
	scrub_file	= saved->scrub_file;
	
	free (arg);
} /* app_pop() */

int do_scrub_next_char(get,unget)
int (*get)();
void (*unget)();
{
	/*State 0: beginning of normal line
	  1: After first whitespace on line (flush more white)
	  2: After first non-white (opcode) on line (keep 1white)
	  3: after second white on line (into operands) (flush white)
	  4: after putting out a .line, put out digits
	  5: parsing a string, then go to old-state
	  6: putting out \ escape in a "d string.
	  7: After putting out a .app-file, put out string.
	  8: After putting out a .app-file string, flush until newline.
	  -1: output string in out_string and go to the state in old_state
	  -2: flush text until a '*' '/' is seen, then go to state old_state
	  */
	
	register int ch, ch2 = 0;
	
	switch (state) {
	case -1: 
		ch= *out_string++;
		if(*out_string==0) {
			state=old_state;
			old_state=3;
		}
		return ch;
		
	case -2:
		for(;;) {
			do {
				ch=(*get)();
			} while(ch!=EOF && ch!='\n' && ch!='*');
			if(ch=='\n' || ch==EOF)
			    return ch;
			
			/* At this point, ch must be a '*' */
			while ( (ch=(*get)()) == '*' ){
				;
			}
			if(ch==EOF || ch=='/')
			    break;
			(*unget)(ch);
		}
		state=old_state;
		return ' ';
		
	case 4:
		ch=(*get)();
		if(ch==EOF || (ch>='0' && ch<='9'))
		    return ch;
		else {
			while(ch!=EOF && IS_WHITESPACE(ch))
			    ch=(*get)();
			if(ch=='"') {
				(*unget)(ch);
				out_string="\n.app-file ";
				old_state=7;
				state= -1;
				return *out_string++;
			} else {
				while(ch!=EOF && ch!='\n')
				    ch=(*get)();
				return ch;
			}
		}
		
	case 5:
		ch=(*get)();
		if(ch=='"') {
			state=old_state;
			return '"';
		} else if(ch=='\\') {
			state=6;
			return ch;
		} else if(ch==EOF) {
			as_warn("End of file in string: inserted '\"'");
 			state=old_state;
			(*unget)('\n');
			return '"';
		} else {
			return ch;
		}
		
	case 6:
		state=5;
		ch=(*get)();
		switch(ch) {
			/* This is neet.  Turn "string
			   more string" into "string\n  more string"
			   */
		case '\n':
			(*unget)('n');
			add_newlines++;
			return '\\';
			
		case '"':
		case '\\':
		case 'b':
		case 'f':
		case 'n':
		case 'r':
		case 't':
#ifdef BACKSLASH_V
		case 'v':
#endif /* BACKSLASH_V */
		case '0':
		case '1':
		case '2':
		case '3':
		case '4':
		case '5':
		case '6':
		case '7':
			break;
			
#ifdef ONLY_STANDARD_ESCAPES
		default:
			as_warn("Unknown escape '\\%c' in string: Ignored",ch);
			break;
#else /* ONLY_STANDARD_ESCAPES */
		default:
			/* Accept \x as x for any x */
			break;
#endif /* ONLY_STANDARD_ESCAPES */
			
		case EOF:
			as_warn("End of file in string: '\"' inserted");
			return '"';
		}
		return ch;
		
	case 7:
		ch=(*get)();
		state=5;
		old_state=8;
		return ch;
		
	case 8:
		do ch= (*get)();
		while(ch!='\n');
		state=0;
		return ch;
	}
	
	/* OK, we are somewhere in states 0 through 4 */
	
	/* flushchar: */
	ch=(*get)();
 recycle:
	if (ch == EOF) {
		if (state != 0)
		    as_warn("End of file not at end of a line: Newline inserted.");
		return ch;
	}
	
	switch (lex[ch]) {
	case LEX_IS_WHITESPACE:
		do ch=(*get)();
		while(ch!=EOF && IS_WHITESPACE(ch));
		if(ch==EOF)
		    return ch;
		if(IS_COMMENT(ch) || (state==0 && IS_LINE_COMMENT(ch)) || ch=='/' || IS_LINE_SEPARATOR(ch)) {
			goto recycle;
		}
		switch (state) {
		case 0:	state++; goto recycle;	/* Punted leading sp */
		case 1:          BAD_CASE(state); /* We can't get here */
		case 2: state++; (*unget)(ch); return ' ';  /* Sp after opco */
		case 3:		 goto recycle;	/* Sp in operands */
		default:	BAD_CASE(state);
		}
		break;
		
	case LEX_IS_TWOCHAR_COMMENT_1ST:
		ch2=(*get)();
		if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
			for(;;) {
				do {
					ch2=(*get)();
					if(ch2 != EOF && IS_NEWLINE(ch2))
					    add_newlines++;
				} while(ch2!=EOF &&
					(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
				
				while (ch2!=EOF &&
				       (lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
					ch2=(*get)();
				}
				
				if(ch2==EOF 
				   || lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
				    break;
				(*unget)(ch);
			}
			if(ch2==EOF)
			    as_warn("End of file in multiline comment");
			
			ch = ' ';
			goto recycle;
		} else {
			if(ch2!=EOF)
			    (*unget)(ch2);
			return ch;
		}
		break;
		
	case LEX_IS_STRINGQUOTE:
		old_state=state;
		state=5;
		return ch;
		
#ifndef IEEE_STYLE
	case LEX_IS_ONECHAR_QUOTE:
		ch=(*get)();
		if(ch==EOF) {
			as_warn("End-of-file after a one-character quote; \000 inserted");
			ch=0;
		}
		sprintf(out_buf,"%d", (int)(unsigned char)ch);
		
		/* None of these 'x constants for us.  We want 'x'.
		 */
		if ( (ch=(*get)()) != '\'' ) {
#ifdef REQUIRE_CHAR_CLOSE_QUOTE
			as_warn("Missing close quote: (assumed)");
#else
			(*unget)(ch);
#endif
		}
		
		old_state=state;
		state= -1;
		out_string=out_buf;
		return *out_string++;
#endif
	case LEX_IS_COLON:
		if(state!=3)
		    state=0;
		return ch;
		
	case LEX_IS_NEWLINE:
		/* Roll out a bunch of newlines from inside comments, etc.  */
		if(add_newlines) {
			--add_newlines;
			(*unget)(ch);
		}
		/* fall thru into... */
		
	case LEX_IS_LINE_SEPARATOR:
		state=0;
		return ch;
		
	case LEX_IS_LINE_COMMENT_START:
		if (state != 0)		/* Not at start of line, act normal */
		    goto de_fault;
		
		/* FIXME-someday: The two character comment stuff was badly
		   thought out.  On i386, we want '/' as line comment start
		   AND we want C style comments.  hence this hack.  The
		   whole lexical process should be reworked.  xoxorich.  */
		
		if (ch == '/' && (ch2 = (*get)()) == '*') {
			state = -2;
			return(do_scrub_next_char(get, unget));
		} else {
			(*unget)(ch2);
		} /* bad hack */
		
		do ch=(*get)();
		while(ch!=EOF && IS_WHITESPACE(ch));
		if(ch==EOF) {
			as_warn("EOF in comment:  Newline inserted");
			return '\n';
		}
		if(ch<'0' || ch>'9') {
			/* Non-numerics:  Eat whole comment line */
			while(ch!=EOF && !IS_NEWLINE(ch))
			    ch=(*get)();
			if(ch==EOF)
			    as_warn("EOF in Comment: Newline inserted");
			state=0;
			return '\n';
		}
		/* Numerics begin comment.  Perhaps CPP `# 123 "filename"' */
		(*unget)(ch);
		old_state=4;
		state= -1;
		out_string=".line ";
		return *out_string++;
		
	case LEX_IS_COMMENT_START:
		do ch=(*get)();
		while(ch!=EOF && !IS_NEWLINE(ch));
		if(ch==EOF)
		    as_warn("EOF in comment:  Newline inserted");
		state=0;
		return '\n';
		
	default:
	de_fault:
		/* Some relatively `normal' character.  */
		if(state==0) {
			state=2;	/* Now seeing opcode */
			return ch;
		} else if(state==1) {
			state=2;	/* Ditto */
			return ch;
		} else {
			return ch;	/* Opcode or operands already */
		}
	}
	return -1;
}

#ifdef TEST

char comment_chars[] = "|";
char line_comment_chars[] = "#";

main()
{
	int	ch;
	
	app_begin();
	while((ch=do_scrub_next_char(stdin))!=EOF)
	    putc(ch,stdout);
}

as_warn(str)
char *str;
{
	fputs(str,stderr);
	putc('\n',stderr);
}
#endif

/*
 * Local Variables:
 * comment-column: 0
 * fill-column: 131
 * End:
 */

/* end of app.c */
Commit	Line	Data
3340f7e5	1	/* Copyright (C) 1987, 1990, 1991, 1992 Free Software Foundation, Inc.
a39116f1 RP	2
	3	Modified by Allen Wirfs-Brock, Instantiations Inc 2/90
	4	*/
fecd2382 RP	5	/* This is the Assembler Pre-Processor
fecd2382 RP	6	Copyright (C) 1987 Free Software Foundation, Inc.
a39116f1 RP	7
	8	This file is part of GAS, the GNU Assembler.
	9
	10	GAS is free software; you can redistribute it and/or modify
	11	it under the terms of the GNU General Public License as published by
	12	the Free Software Foundation; either version 2, or (at your option)
	13	any later version.
	14
	15	GAS is distributed in the hope that it will be useful,
	16	but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	GNU General Public License for more details.
	19
	20	You should have received a copy of the GNU General Public License
	21	along with GAS; see the file COPYING. If not, write to
	22	the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
fecd2382 RP	23
	24	/* App, the assembler pre-processor. This pre-processor strips out excess
	25	spaces, turns single-quoted characters into a decimal constant, and turns
	26	# <number> <filename> <garbage> into a .line <number>\n.app-file <filename> pair.
	27	This needs better error-handling.
a39116f1	28	*/
fecd2382 RP	29
	30	#include <stdio.h>
	31	#include "as.h" /* For BAD_CASE() only */
	32
3340f7e5	33	#if (__STDC__ != 1) && !defined(const)
fecd2382 RP	34	#define const /* Nothing */
	35	#endif
	36
	37	static char lex [256];
	38	static char symbol_chars[] =
a39116f1	39	"$._ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
fecd2382 RP	40
	41	/* These will go in BSS if not defined elsewhere, producing empty strings. */
	42	extern const char comment_chars[];
	43	extern const char line_comment_chars[];
	44	extern const char line_separator_chars[];
	45
	46	#define LEX_IS_SYMBOL_COMPONENT 1
	47	#define LEX_IS_WHITESPACE 2
	48	#define LEX_IS_LINE_SEPARATOR 3
	49	#define LEX_IS_COMMENT_START 4
	50	#define LEX_IS_LINE_COMMENT_START 5
	51	#define LEX_IS_TWOCHAR_COMMENT_1ST 6
	52	#define LEX_IS_TWOCHAR_COMMENT_2ND 7
	53	#define LEX_IS_STRINGQUOTE 8
	54	#define LEX_IS_COLON 9
	55	#define LEX_IS_NEWLINE 10
	56	#define LEX_IS_ONECHAR_QUOTE 11
a39116f1 RP	57	#define IS_SYMBOL_COMPONENT(c) (lex[c] == LEX_IS_SYMBOL_COMPONENT)
	58	#define IS_WHITESPACE(c) (lex[c] == LEX_IS_WHITESPACE)
	59	#define IS_LINE_SEPARATOR(c) (lex[c] == LEX_IS_LINE_SEPARATOR)
	60	#define IS_COMMENT(c) (lex[c] == LEX_IS_COMMENT_START)
	61	#define IS_LINE_COMMENT(c) (lex[c] == LEX_IS_LINE_COMMENT_START)
	62	#define IS_NEWLINE(c) (lex[c] == LEX_IS_NEWLINE)
	63
	64	/* FIXME-soon: The entire lexer/parser thingy should be
	65	built statically at compile time rather than dynamically
	66	each and every time the assembler is run. xoxorich. */
fecd2382 RP	67
	68	void do_scrub_begin() {
	69	const char *p;
a39116f1 RP	70
	71	lex[' '] = LEX_IS_WHITESPACE;
	72	lex['\t'] = LEX_IS_WHITESPACE;
	73	lex['\n'] = LEX_IS_NEWLINE;
	74	lex[';'] = LEX_IS_LINE_SEPARATOR;
	75	lex['"'] = LEX_IS_STRINGQUOTE;
	76	lex['\''] = LEX_IS_ONECHAR_QUOTE;
	77	lex[':'] = LEX_IS_COLON;
	78
fecd2382 RP	79	/* Note that these override the previous defaults, e.g. if ';'
fecd2382 RP	80	is a comment char, then it isn't a line separator. */
a39116f1	81	for (p = symbol_chars; *p; ++p) {
fecd2382	82	lex[*p] = LEX_IS_SYMBOL_COMPONENT;
a39116f1 RP	83	} /* declare symbol characters */
	84
	85	for (p = line_comment_chars; *p; p++) {
fecd2382	86	lex[*p] = LEX_IS_LINE_COMMENT_START;
a39116f1 RP	87	} /* declare line comment chars */
	88
	89	for (p = comment_chars; *p; p++) {
	90	lex[*p] = LEX_IS_COMMENT_START;
	91	} /* declare comment chars */
	92
	93	for (p = line_separator_chars; *p; p++) {
fecd2382	94	lex[*p] = LEX_IS_LINE_SEPARATOR;
a39116f1 RP	95	} /* declare line separators */
a39116f1 RP	96
fecd2382 RP	97	/* Only allow slash-star comments if slash is not in use */
fecd2382 RP	98	if (lex['/'] == 0) {
a39116f1	99	lex['/'] = LEX_IS_TWOCHAR_COMMENT_1ST;
fecd2382	100	}
a39116f1 RP	101	/* FIXME-soon. This is a bad hack but otherwise, we
	102	can't do c-style comments when '/' is a line
	103	comment char. xoxorich. */
	104	if (lex['*'] == 0) {
	105	lex['*'] = LEX_IS_TWOCHAR_COMMENT_2ND;
	106	}
	107	} /* do_scrub_begin() */
fecd2382 RP	108
	109	FILE *scrub_file;
	110
	111	int scrub_from_file() {
	112	return getc(scrub_file);
	113	}
	114
	115	void scrub_to_file(ch)
	116	int ch;
	117	{
	118	ungetc(ch,scrub_file);
	119	} /* scrub_to_file() */
	120
	121	char *scrub_string;
	122	char *scrub_last_string;
	123
	124	int scrub_from_string() {
	125	return scrub_string == scrub_last_string ? EOF : *scrub_string++;
	126	} /* scrub_from_string() */
	127
	128	void scrub_to_string(ch)
	129	int ch;
	130	{
	131	*--scrub_string=ch;
	132	} /* scrub_to_string() */
	133
	134	/* Saved state of the scrubber */
	135	static int state;
	136	static int old_state;
	137	static char *out_string;
	138	static char out_buf[20];
	139	static int add_newlines = 0;
	140
	141	/* Data structure for saving the state of app across #include's. Note that
	142	app is called asynchronously to the parsing of the .include's, so our
	143	state at the time .include is interpreted is completely unrelated.
	144	That's why we have to save it all. */
	145
	146	struct app_save {
a39116f1 RP	147	int state;
	148	int old_state;
	149	char *out_string;
	150	char out_buf[sizeof (out_buf)];
	151	int add_newlines;
	152	char *scrub_string;
	153	char *scrub_last_string;
	154	FILE *scrub_file;
fecd2382 RP	155	};
	156
	157	char *app_push() {
a39116f1 RP	158	register struct app_save *saved;
	159
	160	saved = (struct app_save ) xmalloc(sizeof (saved));
	161	saved->state = state;
	162	saved->old_state = old_state;
	163	saved->out_string = out_string;
	164	bcopy(saved->out_buf, out_buf, sizeof(out_buf));
	165	saved->add_newlines = add_newlines;
	166	saved->scrub_string = scrub_string;
	167	saved->scrub_last_string = scrub_last_string;
	168	saved->scrub_file = scrub_file;
	169
	170	/* do_scrub_begin() is not useful, just wastes time. */
	171	return (char *)saved;
fecd2382 RP	172	}
	173
	174	void app_pop(arg)
	175	char *arg;
	176	{
a39116f1 RP	177	register struct app_save saved = (struct app_save )arg;
	178
	179	/* There is no do_scrub_end (). */
	180	state = saved->state;
	181	old_state = saved->old_state;
	182	out_string = saved->out_string;
	183	bcopy (out_buf, saved->out_buf, sizeof (out_buf));
	184	add_newlines = saved->add_newlines;
	185	scrub_string = saved->scrub_string;
	186	scrub_last_string = saved->scrub_last_string;
	187	scrub_file = saved->scrub_file;
	188
	189	free (arg);
fecd2382 RP	190	} /* app_pop() */
	191
	192	int do_scrub_next_char(get,unget)
	193	int (*get)();
	194	void (*unget)();
	195	{
	196	/*State 0: beginning of normal line
a39116f1 RP	197	1: After first whitespace on line (flush more white)
	198	2: After first non-white (opcode) on line (keep 1white)
	199	3: after second white on line (into operands) (flush white)
	200	4: after putting out a .line, put out digits
	201	5: parsing a string, then go to old-state
	202	6: putting out \ escape in a "d string.
	203	7: After putting out a .app-file, put out string.
	204	8: After putting out a .app-file string, flush until newline.
	205	-1: output string in out_string and go to the state in old_state
	206	-2: flush text until a '*' '/' is seen, then go to state old_state
	207	*/
	208
	209	register int ch, ch2 = 0;
	210
fecd2382 RP	211	switch (state) {
	212	case -1:
	213	ch= *out_string++;
	214	if(*out_string==0) {
	215	state=old_state;
	216	old_state=3;
	217	}
	218	return ch;
a39116f1	219
fecd2382 RP	220	case -2:
	221	for(;;) {
	222	do {
	223	ch=(*get)();
	224	} while(ch!=EOF && ch!='\n' && ch!='*');
	225	if(ch=='\n' \|\| ch==EOF)
a39116f1 RP	226	return ch;
a39116f1 RP	227
fecd2382 RP	228	/* At this point, ch must be a '' /
	229	while ( (ch=(get)()) == '' ){
	230	;
	231	}
	232	if(ch==EOF \|\| ch=='/')
a39116f1	233	break;
fecd2382 RP	234	(*unget)(ch);
	235	}
	236	state=old_state;
	237	return ' ';
a39116f1	238
fecd2382 RP	239	case 4:
	240	ch=(*get)();
	241	if(ch==EOF \|\| (ch>='0' && ch<='9'))
a39116f1	242	return ch;
fecd2382 RP	243	else {
fecd2382 RP	244	while(ch!=EOF && IS_WHITESPACE(ch))
a39116f1	245	ch=(*get)();
fecd2382 RP	246	if(ch=='"') {
	247	(*unget)(ch);
	248	out_string="\n.app-file ";
	249	old_state=7;
	250	state= -1;
	251	return *out_string++;
	252	} else {
	253	while(ch!=EOF && ch!='\n')
a39116f1	254	ch=(*get)();
fecd2382 RP	255	return ch;
	256	}
	257	}
a39116f1	258
fecd2382 RP	259	case 5:
	260	ch=(*get)();
	261	if(ch=='"') {
	262	state=old_state;
	263	return '"';
	264	} else if(ch=='\\') {
	265	state=6;
	266	return ch;
	267	} else if(ch==EOF) {
	268	as_warn("End of file in string: inserted '\"'");
	269	state=old_state;
	270	(*unget)('\n');
	271	return '"';
	272	} else {
	273	return ch;
	274	}
a39116f1	275
fecd2382 RP	276	case 6:
	277	state=5;
	278	ch=(*get)();
	279	switch(ch) {
	280	/* This is neet. Turn "string
	281	more string" into "string\n more string"
a39116f1	282	*/
fecd2382 RP	283	case '\n':
	284	(*unget)('n');
	285	add_newlines++;
	286	return '\\';
a39116f1	287
fecd2382 RP	288	case '"':
	289	case '\\':
	290	case 'b':
	291	case 'f':
	292	case 'n':
	293	case 'r':
	294	case 't':
	295	#ifdef BACKSLASH_V
	296	case 'v':
	297	#endif /* BACKSLASH_V */
	298	case '0':
	299	case '1':
	300	case '2':
	301	case '3':
	302	case '4':
	303	case '5':
	304	case '6':
	305	case '7':
	306	break;
a39116f1	307
fecd2382 RP	308	#ifdef ONLY_STANDARD_ESCAPES
	309	default:
	310	as_warn("Unknown escape '\\%c' in string: Ignored",ch);
	311	break;
	312	#else /* ONLY_STANDARD_ESCAPES */
	313	default:
	314	/* Accept \x as x for any x */
	315	break;
	316	#endif /* ONLY_STANDARD_ESCAPES */
a39116f1	317
fecd2382 RP	318	case EOF:
	319	as_warn("End of file in string: '\"' inserted");
	320	return '"';
	321	}
	322	return ch;
a39116f1	323
fecd2382 RP	324	case 7:
	325	ch=(*get)();
	326	state=5;
	327	old_state=8;
	328	return ch;
a39116f1	329
fecd2382 RP	330	case 8:
	331	do ch= (*get)();
	332	while(ch!='\n');
	333	state=0;
	334	return ch;
	335	}
a39116f1	336
fecd2382	337	/* OK, we are somewhere in states 0 through 4 */
a39116f1 RP	338
a39116f1 RP	339	/* flushchar: */
fecd2382 RP	340	ch=(*get)();
	341	recycle:
	342	if (ch == EOF) {
	343	if (state != 0)
a39116f1	344	as_warn("End of file not at end of a line: Newline inserted.");
fecd2382 RP	345	return ch;
fecd2382 RP	346	}
a39116f1	347
fecd2382 RP	348	switch (lex[ch]) {
	349	case LEX_IS_WHITESPACE:
	350	do ch=(*get)();
	351	while(ch!=EOF && IS_WHITESPACE(ch));
	352	if(ch==EOF)
a39116f1	353	return ch;
fecd2382 RP	354	if(IS_COMMENT(ch) \|\| (state==0 && IS_LINE_COMMENT(ch)) \|\| ch=='/' \|\| IS_LINE_SEPARATOR(ch)) {
	355	goto recycle;
	356	}
	357	switch (state) {
	358	case 0: state++; goto recycle; /* Punted leading sp */
	359	case 1: BAD_CASE(state); /* We can't get here */
	360	case 2: state++; (unget)(ch); return ' '; / Sp after opco */
	361	case 3: goto recycle; /* Sp in operands */
	362	default: BAD_CASE(state);
	363	}
	364	break;
a39116f1	365
fecd2382 RP	366	case LEX_IS_TWOCHAR_COMMENT_1ST:
	367	ch2=(*get)();
	368	if (ch2 != EOF && lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND) {
	369	for(;;) {
	370	do {
	371	ch2=(*get)();
	372	if(ch2 != EOF && IS_NEWLINE(ch2))
a39116f1	373	add_newlines++;
fecd2382	374	} while(ch2!=EOF &&
a39116f1 RP	375	(lex[ch2] != LEX_IS_TWOCHAR_COMMENT_2ND));
a39116f1 RP	376
fecd2382	377	while (ch2!=EOF &&
a39116f1	378	(lex[ch2] == LEX_IS_TWOCHAR_COMMENT_2ND)){
fecd2382 RP	379	ch2=(*get)();
fecd2382 RP	380	}
a39116f1	381
fecd2382	382	if(ch2==EOF
a39116f1 RP	383	\|\| lex[ch2] == LEX_IS_TWOCHAR_COMMENT_1ST)
a39116f1 RP	384	break;
fecd2382 RP	385	(*unget)(ch);
	386	}
	387	if(ch2==EOF)
a39116f1 RP	388	as_warn("End of file in multiline comment");
a39116f1 RP	389
fecd2382 RP	390	ch = ' ';
	391	goto recycle;
	392	} else {
	393	if(ch2!=EOF)
a39116f1	394	(*unget)(ch2);
fecd2382 RP	395	return ch;
	396	}
	397	break;
a39116f1	398
fecd2382 RP	399	case LEX_IS_STRINGQUOTE:
	400	old_state=state;
	401	state=5;
	402	return ch;
a39116f1 RP	403
a39116f1 RP	404	#ifndef IEEE_STYLE
fecd2382 RP	405	case LEX_IS_ONECHAR_QUOTE:
	406	ch=(*get)();
	407	if(ch==EOF) {
	408	as_warn("End-of-file after a one-character quote; \000 inserted");
	409	ch=0;
	410	}
	411	sprintf(out_buf,"%d", (int)(unsigned char)ch);
a39116f1	412
fecd2382 RP	413	/* None of these 'x constants for us. We want 'x'.
	414	*/
	415	if ( (ch=(*get)()) != '\'' ) {
	416	#ifdef REQUIRE_CHAR_CLOSE_QUOTE
	417	as_warn("Missing close quote: (assumed)");
	418	#else
	419	(*unget)(ch);
	420	#endif
	421	}
a39116f1	422
fecd2382 RP	423	old_state=state;
	424	state= -1;
	425	out_string=out_buf;
	426	return *out_string++;
a39116f1	427	#endif
fecd2382 RP	428	case LEX_IS_COLON:
fecd2382 RP	429	if(state!=3)
a39116f1	430	state=0;
fecd2382	431	return ch;
a39116f1	432
fecd2382 RP	433	case LEX_IS_NEWLINE:
	434	/* Roll out a bunch of newlines from inside comments, etc. */
	435	if(add_newlines) {
	436	--add_newlines;
	437	(*unget)(ch);
	438	}
	439	/* fall thru into... */
a39116f1	440
fecd2382 RP	441	case LEX_IS_LINE_SEPARATOR:
	442	state=0;
	443	return ch;
a39116f1	444
fecd2382 RP	445	case LEX_IS_LINE_COMMENT_START:
fecd2382 RP	446	if (state != 0) /* Not at start of line, act normal */
a39116f1 RP	447	goto de_fault;
	448
	449	/* FIXME-someday: The two character comment stuff was badly
	450	thought out. On i386, we want '/' as line comment start
	451	AND we want C style comments. hence this hack. The
	452	whole lexical process should be reworked. xoxorich. */
	453
	454	if (ch == '/' && (ch2 = (get)()) == '') {
	455	state = -2;
	456	return(do_scrub_next_char(get, unget));
	457	} else {
	458	(*unget)(ch2);
	459	} /* bad hack */
	460
fecd2382 RP	461	do ch=(*get)();
	462	while(ch!=EOF && IS_WHITESPACE(ch));
	463	if(ch==EOF) {
	464	as_warn("EOF in comment: Newline inserted");
	465	return '\n';
	466	}
	467	if(ch<'0' \|\| ch>'9') {
	468	/* Non-numerics: Eat whole comment line */
	469	while(ch!=EOF && !IS_NEWLINE(ch))
a39116f1	470	ch=(*get)();
fecd2382	471	if(ch==EOF)
a39116f1	472	as_warn("EOF in Comment: Newline inserted");
fecd2382 RP	473	state=0;
	474	return '\n';
	475	}
	476	/* Numerics begin comment. Perhaps CPP `# 123 "filename"' */
	477	(*unget)(ch);
	478	old_state=4;
	479	state= -1;
	480	out_string=".line ";
	481	return *out_string++;
a39116f1	482
fecd2382 RP	483	case LEX_IS_COMMENT_START:
	484	do ch=(*get)();
	485	while(ch!=EOF && !IS_NEWLINE(ch));
	486	if(ch==EOF)
a39116f1	487	as_warn("EOF in comment: Newline inserted");
fecd2382 RP	488	state=0;
fecd2382 RP	489	return '\n';
a39116f1	490
fecd2382 RP	491	default:
	492	de_fault:
	493	/* Some relatively `normal' character. */
	494	if(state==0) {
	495	state=2; /* Now seeing opcode */
	496	return ch;
	497	} else if(state==1) {
	498	state=2; /* Ditto */
	499	return ch;
	500	} else {
	501	return ch; /* Opcode or operands already */
	502	}
	503	}
	504	return -1;
	505	}
	506
	507	#ifdef TEST
	508
	509	char comment_chars[] = "\|";
	510	char line_comment_chars[] = "#";
	511
	512	main()
	513	{
	514	int ch;
a39116f1	515
fecd2382 RP	516	app_begin();
fecd2382 RP	517	while((ch=do_scrub_next_char(stdin))!=EOF)
a39116f1	518	putc(ch,stdout);
fecd2382 RP	519	}
	520
	521	as_warn(str)
	522	char *str;
	523	{
	524	fputs(str,stderr);
	525	putc('\n',stderr);
	526	}
	527	#endif
	528
	529	/*
	530	* Local Variables:
	531	* comment-column: 0
	532	* fill-column: 131
	533	* End:
	534	*/
	535
	536	/* end of app.c */