[linux.git] / scripts / genksyms / lex.l

/* Lexical analysis for genksyms.
   Copyright 1996, 1997 Linux International.

   New implementation contributed by Richard Henderson <[email protected]>
   Based on original work by Bjorn Ekwall <[email protected]>

   Taken from Linux modutils 2.4.22.

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 2 of the License, or (at your
   option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software Foundation,
   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */


%{

#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "genksyms.h"
#include "parse.tab.h"

/* We've got a two-level lexer here.  We let flex do basic tokenization
   and then we categorize those basic tokens in the second stage.  */
#define YY_DECL		static int yylex1(void)

%}

IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*

O_INT			0[0-7]*
D_INT			[1-9][0-9]*
X_INT			0[Xx][0-9A-Fa-f]+
I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?

FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
EXP			[Ee][+-]?[0-9]+
F_SUF			[FfLl]
REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)

STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'

MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)

/* We don't do multiple input files.  */
%option noyywrap

%option noinput

%%


 /* Keep track of our location in the original source files.  */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
^#.*\n					cur_line++;
\n					cur_line++;

 /* Ignore all other whitespace.  */
[ \t\f\v\r]+				;


{STRING}				return STRING;
{CHAR}					return CHAR;
{IDENT}					return IDENT;

 /* The Pedant requires that the other C multi-character tokens be
    recognized as tokens.  We don't actually use them since we don't
    parse expressions, but we do want whitespace to be arranged
    around them properly.  */
{MC_TOKEN}				return OTHER;
{INT}					return INT;
{REAL}					return REAL;

"..."					return DOTS;

 /* All other tokens are single characters.  */
.					return yytext[0];


%%

/* Bring in the keyword recognizer.  */

#include "keywords.hash.c"


/* Macros to append to our phrase collection list.  */

/*
 * We mark any token, that that equals to a known enumerator, as
 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
 * the only problem is struct and union members:
 *    enum e { a, b }; struct s { int a, b; }
 * but in this case, the only effect will be, that the ABI checksums become
 * more volatile, which is acceptable. Also, such collisions are quite rare,
 * so far it was only observed in include/linux/telephony.h.
 */
#define _APP(T,L)	do {						   \
			  cur_node = next_node;				   \
			  next_node = xmalloc(sizeof(*next_node));	   \
			  next_node->next = cur_node;			   \
			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
			  cur_node->tag =				   \
			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
			  cur_node->in_source_file = in_source_file;       \
			} while (0)

#define APP		_APP(yytext, yyleng)


/* The second stage lexer.  Here we incorporate knowledge of the state
   of the parser to tailor the tokens that are returned.  */

int
yylex(void)
{
  static enum {
    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
    ST_TABLE_5, ST_TABLE_6
  } lexstate = ST_NOTSTARTED;

  static int suppress_type_lookup, dont_want_brace_phrase;
  static struct string_list *next_node;

  int token, count = 0;
  struct string_list *cur_node;

  if (lexstate == ST_NOTSTARTED)
    {
      next_node = xmalloc(sizeof(*next_node));
      next_node->next = NULL;
      lexstate = ST_NORMAL;
    }

repeat:
  token = yylex1();

  if (token == 0)
    return 0;
  else if (token == FILENAME)
    {
      char *file, *e;

      /* Save the filename and line number for later error messages.  */

      if (cur_filename)
	free(cur_filename);

      file = strchr(yytext, '\"')+1;
      e = strchr(file, '\"');
      *e = '\0';
      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
      cur_line = atoi(yytext+2);

      if (!source_file) {
        source_file = xstrdup(cur_filename);
        in_source_file = 1;
      } else {
        in_source_file = (strcmp(cur_filename, source_file) == 0);
      }

      goto repeat;
    }

  switch (lexstate)
    {
    case ST_NORMAL:
      switch (token)
	{
	case IDENT:
	  APP;
	  {
	    const struct resword *r = is_reserved_word(yytext, yyleng);
	    if (r)
	      {
		switch (token = r->token)
		  {
		  case ATTRIBUTE_KEYW:
		    lexstate = ST_ATTRIBUTE;
		    count = 0;
		    goto repeat;
		  case ASM_KEYW:
		    lexstate = ST_ASM;
		    count = 0;
		    goto repeat;

		  case STRUCT_KEYW:
		  case UNION_KEYW:
		  case ENUM_KEYW:
		    dont_want_brace_phrase = 3;
		    suppress_type_lookup = 2;
		    goto fini;

		  case EXPORT_SYMBOL_KEYW:
		      goto fini;
		  }
	      }
	    if (!suppress_type_lookup)
	      {
		if (find_symbol(yytext, SYM_TYPEDEF, 1))
		  token = TYPE;
	      }
	  }
	  break;

	case '[':
	  APP;
	  lexstate = ST_BRACKET;
	  count = 1;
	  goto repeat;

	case '{':
	  APP;
	  if (dont_want_brace_phrase)
	    break;
	  lexstate = ST_BRACE;
	  count = 1;
	  goto repeat;

	case '=': case ':':
	  APP;
	  lexstate = ST_EXPRESSION;
	  break;

	case DOTS:
	default:
	  APP;
	  break;
	}
      break;

    case ST_ATTRIBUTE:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ATTRIBUTE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_ASM:
      APP;
      switch (token)
	{
	case '(':
	  ++count;
	  goto repeat;
	case ')':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = ASM_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACKET:
      APP;
      switch (token)
	{
	case '[':
	  ++count;
	  goto repeat;
	case ']':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACKET_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_BRACE:
      APP;
      switch (token)
	{
	case '{':
	  ++count;
	  goto repeat;
	case '}':
	  if (--count == 0)
	    {
	      lexstate = ST_NORMAL;
	      token = BRACE_PHRASE;
	      break;
	    }
	  goto repeat;
	default:
	  goto repeat;
	}
      break;

    case ST_EXPRESSION:
      switch (token)
	{
	case '(': case '[': case '{':
	  ++count;
	  APP;
	  goto repeat;
	case '}':
	  /* is this the last line of an enum declaration? */
	  if (count == 0)
	    {
	      /* Put back the token we just read so's we can find it again
		 after registering the expression.  */
	      unput(token);

	      lexstate = ST_NORMAL;
	      token = EXPRESSION_PHRASE;
	      break;
	    }
	  /* FALLTHRU */
	case ')': case ']':
	  --count;
	  APP;
	  goto repeat;
	case ',': case ';':
	  if (count == 0)
	    {
	      /* Put back the token we just read so's we can find it again
		 after registering the expression.  */
	      unput(token);

	      lexstate = ST_NORMAL;
	      token = EXPRESSION_PHRASE;
	      break;
	    }
	  APP;
	  goto repeat;
	default:
	  APP;
	  goto repeat;
	}
      break;

    case ST_TABLE_1:
      goto repeat;

    case ST_TABLE_2:
      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
	{
	  token = EXPORT_SYMBOL_KEYW;
	  lexstate = ST_TABLE_5;
	  APP;
	  break;
	}
      lexstate = ST_TABLE_6;
      /* FALLTHRU */

    case ST_TABLE_6:
      switch (token)
	{
	case '{': case '[': case '(':
	  ++count;
	  break;
	case '}': case ']': case ')':
	  --count;
	  break;
	case ',':
	  if (count == 0)
	    lexstate = ST_TABLE_2;
	  break;
	};
      goto repeat;

    case ST_TABLE_3:
      goto repeat;

    case ST_TABLE_4:
      if (token == ';')
	lexstate = ST_NORMAL;
      goto repeat;

    case ST_TABLE_5:
      switch (token)
	{
	case ',':
	  token = ';';
	  lexstate = ST_TABLE_2;
	  APP;
	  break;
	default:
	  APP;
	  break;
	}
      break;

    default:
      exit(1);
    }
fini:

  if (suppress_type_lookup > 0)
    --suppress_type_lookup;
  if (dont_want_brace_phrase > 0)
    --dont_want_brace_phrase;

  yylval = &next_node->next;

  return token;
}
Commit	Line	Data
1da177e4 LT	1	/* Lexical analysis for genksyms.
	2	Copyright 1996, 1997 Linux International.
	3
	4	New implementation contributed by Richard Henderson <[email protected]>
	5	Based on original work by Bjorn Ekwall <[email protected]>
	6
	7	Taken from Linux modutils 2.4.22.
	8
	9	This program is free software; you can redistribute it and/or modify it
	10	under the terms of the GNU General Public License as published by the
	11	Free Software Foundation; either version 2 of the License, or (at your
	12	option) any later version.
	13
	14	This program is distributed in the hope that it will be useful, but
	15	WITHOUT ANY WARRANTY; without even the implied warranty of
	16	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	17	General Public License for more details.
	18
	19	You should have received a copy of the GNU General Public License
	20	along with this program; if not, write to the Free Software Foundation,
	21	Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
	22
	23
	24	%{
	25
	26	#include <limits.h>
	27	#include <stdlib.h>
	28	#include <string.h>
	29	#include <ctype.h>
	30
	31	#include "genksyms.h"
880f4499	32	#include "parse.tab.h"
1da177e4 LT	33
	34	/* We've got a two-level lexer here. We let flex do basic tokenization
	35	and then we categorize those basic tokens in the second stage. */
	36	#define YY_DECL static int yylex1(void)
	37
	38	%}
	39
	40	IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
	41
	42	O_INT 0[0-7]*
	43	D_INT [1-9][0-9]*
	44	X_INT 0[Xx][0-9A-Fa-f]+
	45	I_SUF [Uu]\|[Ll]\|[Uu][Ll]\|[Ll][Uu]
	46	INT ({O_INT}\|{D_INT}\|{X_INT}){I_SUF}?
	47
	48	FRAC ([0-9]*\.[0-9]+)\|([0-9]+\.)
	49	EXP [Ee][+-]?[0-9]+
	50	F_SUF [FfLl]
	51	REAL ({FRAC}{EXP}?{F_SUF}?)\|([0-9]+{EXP}{F_SUF}?)
	52
	53	STRING L?\"([^\\\"]\\.)[^\\\"]*\"
	54	CHAR L?\'([^\\\']\\.)[^\\\']*\'
	55
	56	MC_TOKEN ([~%^&*+=\|<>/-]=)\|(&&)\|("\|\|")\|(->)\|(<<)\|(>>)
	57
1da177e4 LT	58	/* We don't do multiple input files. */
	59	%option noyywrap
	60
11ddad39 AB	61	%option noinput
11ddad39 AB	62
1da177e4 LT	63	%%
	64
	65
	66	/* Keep track of our location in the original source files. */
	67	^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
	68	^#.*\n cur_line++;
	69	\n cur_line++;
	70
	71	/* Ignore all other whitespace. */
	72	[ \t\f\v\r]+ ;
	73
	74
	75	{STRING} return STRING;
	76	{CHAR} return CHAR;
	77	{IDENT} return IDENT;
	78
	79	/* The Pedant requires that the other C multi-character tokens be
	80	recognized as tokens. We don't actually use them since we don't
	81	parse expressions, but we do want whitespace to be arranged
	82	around them properly. */
95f1d639 MM	83	{MC_TOKEN} return OTHER;
	84	{INT} return INT;
	85	{REAL} return REAL;
1da177e4 LT	86
	87	"..." return DOTS;
	88
	89	/* All other tokens are single characters. */
	90	. return yytext[0];
	91
	92
	93	%%
	94
	95	/* Bring in the keyword recognizer. */
	96
880f4499	97	#include "keywords.hash.c"
1da177e4 LT	98
	99
	100	/* Macros to append to our phrase collection list. */
	101
e37ddb82 MM	102	/*
	103	* We mark any token, that that equals to a known enumerator, as
	104	* SYM_ENUM_CONST. The parser will change this for struct and union tags later,
	105	* the only problem is struct and union members:
	106	* enum e { a, b }; struct s { int a, b; }
	107	* but in this case, the only effect will be, that the ABI checksums become
	108	* more volatile, which is acceptable. Also, such collisions are quite rare,
	109	* so far it was only observed in include/linux/telephony.h.
	110	*/
1da177e4 LT	111	#define _APP(T,L) do { \
	112	cur_node = next_node; \
	113	next_node = xmalloc(sizeof(*next_node)); \
	114	next_node->next = cur_node; \
	115	cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
e37ddb82 MM	116	cur_node->tag = \
	117	find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
	118	SYM_ENUM_CONST : SYM_NORMAL ; \
2c5925d6	119	cur_node->in_source_file = in_source_file; \
1da177e4 LT	120	} while (0)
	121
	122	#define APP _APP(yytext, yyleng)
	123
	124
	125	/* The second stage lexer. Here we incorporate knowledge of the state
	126	of the parser to tailor the tokens that are returned. */
	127
	128	int
	129	yylex(void)
	130	{
	131	static enum {
	132	ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
	133	ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
	134	ST_TABLE_5, ST_TABLE_6
	135	} lexstate = ST_NOTSTARTED;
	136
	137	static int suppress_type_lookup, dont_want_brace_phrase;
	138	static struct string_list *next_node;
	139
	140	int token, count = 0;
	141	struct string_list *cur_node;
	142
	143	if (lexstate == ST_NOTSTARTED)
	144	{
1da177e4 LT	145	next_node = xmalloc(sizeof(*next_node));
	146	next_node->next = NULL;
	147	lexstate = ST_NORMAL;
	148	}
	149
	150	repeat:
	151	token = yylex1();
	152
	153	if (token == 0)
	154	return 0;
	155	else if (token == FILENAME)
	156	{
	157	char file, e;
	158
	159	/* Save the filename and line number for later error messages. */
	160
	161	if (cur_filename)
	162	free(cur_filename);
	163
	164	file = strchr(yytext, '\"')+1;
	165	e = strchr(file, '\"');
	166	*e = '\0';
	167	cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
	168	cur_line = atoi(yytext+2);
	169
2c5925d6 MM	170	if (!source_file) {
	171	source_file = xstrdup(cur_filename);
	172	in_source_file = 1;
	173	} else {
	174	in_source_file = (strcmp(cur_filename, source_file) == 0);
	175	}
	176
1da177e4 LT	177	goto repeat;
	178	}
	179
	180	switch (lexstate)
	181	{
	182	case ST_NORMAL:
	183	switch (token)
	184	{
	185	case IDENT:
	186	APP;
	187	{
	188	const struct resword *r = is_reserved_word(yytext, yyleng);
	189	if (r)
	190	{
	191	switch (token = r->token)
	192	{
	193	case ATTRIBUTE_KEYW:
	194	lexstate = ST_ATTRIBUTE;
	195	count = 0;
	196	goto repeat;
	197	case ASM_KEYW:
	198	lexstate = ST_ASM;
	199	count = 0;
	200	goto repeat;
	201
	202	case STRUCT_KEYW:
	203	case UNION_KEYW:
1da177e4	204	case ENUM_KEYW:
e37ddb82	205	dont_want_brace_phrase = 3;
1da177e4 LT	206	suppress_type_lookup = 2;
	207	goto fini;
	208
	209	case EXPORT_SYMBOL_KEYW:
	210	goto fini;
	211	}
	212	}
	213	if (!suppress_type_lookup)
	214	{
01762c4e	215	if (find_symbol(yytext, SYM_TYPEDEF, 1))
1da177e4 LT	216	token = TYPE;
	217	}
	218	}
	219	break;
	220
	221	case '[':
	222	APP;
	223	lexstate = ST_BRACKET;
	224	count = 1;
	225	goto repeat;
	226
	227	case '{':
	228	APP;
	229	if (dont_want_brace_phrase)
	230	break;
	231	lexstate = ST_BRACE;
	232	count = 1;
	233	goto repeat;
	234
	235	case '=': case ':':
	236	APP;
	237	lexstate = ST_EXPRESSION;
	238	break;
	239
	240	case DOTS:
	241	default:
	242	APP;
	243	break;
	244	}
	245	break;
	246
	247	case ST_ATTRIBUTE:
	248	APP;
	249	switch (token)
	250	{
	251	case '(':
	252	++count;
	253	goto repeat;
	254	case ')':
	255	if (--count == 0)
	256	{
	257	lexstate = ST_NORMAL;
	258	token = ATTRIBUTE_PHRASE;
	259	break;
	260	}
	261	goto repeat;
	262	default:
	263	goto repeat;
	264	}
	265	break;
	266
	267	case ST_ASM:
	268	APP;
	269	switch (token)
	270	{
	271	case '(':
	272	++count;
	273	goto repeat;
	274	case ')':
	275	if (--count == 0)
	276	{
	277	lexstate = ST_NORMAL;
	278	token = ASM_PHRASE;
	279	break;
280	}
281	goto repeat;
282	default:
283	goto repeat;
284	}
285	break;
286
287	case ST_BRACKET:
288	APP;
289	switch (token)
290	{
291	case '[':
292	++count;
293	goto repeat;
294	case ']':
295	if (--count == 0)
296	{
297	lexstate = ST_NORMAL;
298	token = BRACKET_PHRASE;
299	break;
300	}
301	goto repeat;
302	default:
303	goto repeat;
304	}
305	break;
306
307	case ST_BRACE:
308	APP;
309	switch (token)
310	{
311	case '{':
312	++count;
313	goto repeat;
314	case '}':
315	if (--count == 0)
316	{
317	lexstate = ST_NORMAL;
318	token = BRACE_PHRASE;
319	break;
320	}
321	goto repeat;
322	default:
323	goto repeat;
324	}
325	break;
326
327	case ST_EXPRESSION:
328	switch (token)
329	{
330	case '(': case '[': case '{':
331	++count;
332	APP;
333	goto repeat;
e37ddb82 MM	334	case '}':
	335	/* is this the last line of an enum declaration? */
	336	if (count == 0)
	337	{
	338	/* Put back the token we just read so's we can find it again
	339	after registering the expression. */
	340	unput(token);
	341
	342	lexstate = ST_NORMAL;
	343	token = EXPRESSION_PHRASE;
	344	break;
	345	}
	346	/* FALLTHRU */
	347	case ')': case ']':
1da177e4 LT	348	--count;
	349	APP;
	350	goto repeat;
	351	case ',': case ';':
	352	if (count == 0)
	353	{
	354	/* Put back the token we just read so's we can find it again
	355	after registering the expression. */
	356	unput(token);
	357
	358	lexstate = ST_NORMAL;
	359	token = EXPRESSION_PHRASE;
	360	break;
	361	}
	362	APP;
	363	goto repeat;
	364	default:
	365	APP;
	366	goto repeat;
	367	}
	368	break;
	369
	370	case ST_TABLE_1:
	371	goto repeat;
	372
	373	case ST_TABLE_2:
	374	if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
	375	{
	376	token = EXPORT_SYMBOL_KEYW;
	377	lexstate = ST_TABLE_5;
	378	APP;
	379	break;
	380	}
	381	lexstate = ST_TABLE_6;
	382	/* FALLTHRU */
	383
	384	case ST_TABLE_6:
	385	switch (token)
	386	{
	387	case '{': case '[': case '(':
	388	++count;
	389	break;
	390	case '}': case ']': case ')':
	391	--count;
	392	break;
	393	case ',':
	394	if (count == 0)
	395	lexstate = ST_TABLE_2;
	396	break;
	397	};
	398	goto repeat;
	399
	400	case ST_TABLE_3:
	401	goto repeat;
	402
	403	case ST_TABLE_4:
	404	if (token == ';')
	405	lexstate = ST_NORMAL;
	406	goto repeat;
	407
	408	case ST_TABLE_5:
	409	switch (token)
	410	{
	411	case ',':
412	token = ';';
413	lexstate = ST_TABLE_2;
414	APP;
415	break;
416	default:
417	APP;
418	break;
419	}
420	break;
421
422	default:
6803dc0e	423	exit(1);
1da177e4 LT	424	}
	425	fini:
	426
	427	if (suppress_type_lookup > 0)
	428	--suppress_type_lookup;
	429	if (dont_want_brace_phrase > 0)
	430	--dont_want_brace_phrase;
	431
	432	yylval = &next_node->next;
	433
	434	return token;
	435	}