lex.l - scripts/genksyms/lex.l - Linux diff v6.8 - Bootlin Elixir Cross Referencer

  1/* SPDX-License-Identifier: GPL-2.0-or-later */
  2/*
  3 * Lexical analysis for genksyms.
  4 * Copyright 1996, 1997 Linux International.
  5 *
  6 * New implementation contributed by Richard Henderson <rth@tamu.edu>
  7 * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
  8 *
  9 * Taken from Linux modutils 2.4.22.
 10 */
 
 
 
 
 
 
 
 
 
 
 
 
 11
 12%{
 13
 14#include <limits.h>
 15#include <stdlib.h>
 16#include <string.h>
 17#include <ctype.h>
 18
 19#include "genksyms.h"
 20#include "parse.tab.h"
 21
 22/* We've got a two-level lexer here.  We let flex do basic tokenization
 23   and then we categorize those basic tokens in the second stage.  */
 24#define YY_DECL		static int yylex1(void)
 25
 26%}
 27
 28IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
 29
 30O_INT			0[0-7]*
 31D_INT			[1-9][0-9]*
 32X_INT			0[Xx][0-9A-Fa-f]+
 33I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
 34INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
 35
 36FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
 37EXP			[Ee][+-]?[0-9]+
 38F_SUF			[FfLl]
 39REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
 40
 41STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
 42CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
 43
 44MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
 45
 46/* We don't do multiple input files.  */
 47%option noyywrap
 48
 49%option noinput
 50
 51%%
 52
 53
 54 /* Keep track of our location in the original source files.  */
 55^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
 56^#.*\n					cur_line++;
 57\n					cur_line++;
 58
 59 /* Ignore all other whitespace.  */
 60[ \t\f\v\r]+				;
 61
 62
 63{STRING}				return STRING;
 64{CHAR}					return CHAR;
 65{IDENT}					return IDENT;
 66
 67 /* The Pedant requires that the other C multi-character tokens be
 68    recognized as tokens.  We don't actually use them since we don't
 69    parse expressions, but we do want whitespace to be arranged
 70    around them properly.  */
 71{MC_TOKEN}				return OTHER;
 72{INT}					return INT;
 73{REAL}					return REAL;
 74
 75"..."					return DOTS;
 76
 77 /* All other tokens are single characters.  */
 78.					return yytext[0];
 79
 80
 81%%
 82
 83/* Bring in the keyword recognizer.  */
 84
 85#include "keywords.c"
 86
 87
 88/* Macros to append to our phrase collection list.  */
 89
 90/*
 91 * We mark any token, that that equals to a known enumerator, as
 92 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
 93 * the only problem is struct and union members:
 94 *    enum e { a, b }; struct s { int a, b; }
 95 * but in this case, the only effect will be, that the ABI checksums become
 96 * more volatile, which is acceptable. Also, such collisions are quite rare,
 97 * so far it was only observed in include/linux/telephony.h.
 98 */
 99#define _APP(T,L)	do {						   \
100			  cur_node = next_node;				   \
101			  next_node = xmalloc(sizeof(*next_node));	   \
102			  next_node->next = cur_node;			   \
103			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
104			  cur_node->tag =				   \
105			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
106			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
107			  cur_node->in_source_file = in_source_file;       \
108			} while (0)
109
110#define APP		_APP(yytext, yyleng)
111
112
113/* The second stage lexer.  Here we incorporate knowledge of the state
114   of the parser to tailor the tokens that are returned.  */
115
116int
117yylex(void)
118{
119  static enum {
120    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
121    ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
 
122  } lexstate = ST_NOTSTARTED;
123
124  static int suppress_type_lookup, dont_want_brace_phrase;
125  static struct string_list *next_node;
126  static char *source_file;
127
128  int token, count = 0;
129  struct string_list *cur_node;
130
131  if (lexstate == ST_NOTSTARTED)
132    {
133      next_node = xmalloc(sizeof(*next_node));
134      next_node->next = NULL;
135      lexstate = ST_NORMAL;
136    }
137
138repeat:
139  token = yylex1();
140
141  if (token == 0)
142    return 0;
143  else if (token == FILENAME)
144    {
145      char *file, *e;
146
147      /* Save the filename and line number for later error messages.  */
148
149      if (cur_filename)
150	free(cur_filename);
151
152      file = strchr(yytext, '\"')+1;
153      e = strchr(file, '\"');
154      *e = '\0';
155      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
156      cur_line = atoi(yytext+2);
157
158      if (!source_file) {
159        source_file = xstrdup(cur_filename);
160        in_source_file = 1;
161      } else {
162        in_source_file = (strcmp(cur_filename, source_file) == 0);
163      }
164
165      goto repeat;
166    }
167
168  switch (lexstate)
169    {
170    case ST_NORMAL:
171      switch (token)
172	{
173	case IDENT:
174	  APP;
175	  {
176	    int r = is_reserved_word(yytext, yyleng);
177	    if (r >= 0)
178	      {
179		switch (token = r)
180		  {
181		  case ATTRIBUTE_KEYW:
182		    lexstate = ST_ATTRIBUTE;
183		    count = 0;
184		    goto repeat;
185		  case ASM_KEYW:
186		    lexstate = ST_ASM;
187		    count = 0;
188		    goto repeat;
189		  case TYPEOF_KEYW:
190		    lexstate = ST_TYPEOF;
191		    count = 0;
192		    goto repeat;
193
194		  case STRUCT_KEYW:
195		  case UNION_KEYW:
196		  case ENUM_KEYW:
197		    dont_want_brace_phrase = 3;
198		    suppress_type_lookup = 2;
199		    goto fini;
200
201		  case EXPORT_SYMBOL_KEYW:
202		      goto fini;
203
204		  case STATIC_ASSERT_KEYW:
205		    lexstate = ST_STATIC_ASSERT;
206		    count = 0;
207		    goto repeat;
208		  }
209	      }
210	    if (!suppress_type_lookup)
211	      {
212		if (find_symbol(yytext, SYM_TYPEDEF, 1))
213		  token = TYPE;
214	      }
215	  }
216	  break;
217
218	case '[':
219	  APP;
220	  lexstate = ST_BRACKET;
221	  count = 1;
222	  goto repeat;
223
224	case '{':
225	  APP;
226	  if (dont_want_brace_phrase)
227	    break;
228	  lexstate = ST_BRACE;
229	  count = 1;
230	  goto repeat;
231
232	case '=': case ':':
233	  APP;
234	  lexstate = ST_EXPRESSION;
235	  break;
236
 
237	default:
238	  APP;
239	  break;
240	}
241      break;
242
243    case ST_ATTRIBUTE:
244      APP;
245      switch (token)
246	{
247	case '(':
248	  ++count;
249	  goto repeat;
250	case ')':
251	  if (--count == 0)
252	    {
253	      lexstate = ST_NORMAL;
254	      token = ATTRIBUTE_PHRASE;
255	      break;
256	    }
257	  goto repeat;
258	default:
259	  goto repeat;
260	}
261      break;
262
263    case ST_ASM:
264      APP;
265      switch (token)
266	{
267	case '(':
268	  ++count;
269	  goto repeat;
270	case ')':
271	  if (--count == 0)
272	    {
273	      lexstate = ST_NORMAL;
274	      token = ASM_PHRASE;
275	      break;
276	    }
277	  goto repeat;
278	default:
279	  goto repeat;
280	}
281      break;
282
283    case ST_TYPEOF_1:
284      if (token == IDENT)
285	{
286	  if (is_reserved_word(yytext, yyleng) >= 0
287	      || find_symbol(yytext, SYM_TYPEDEF, 1))
288	    {
289	      yyless(0);
290	      unput('(');
291	      lexstate = ST_NORMAL;
292	      token = TYPEOF_KEYW;
293	      break;
294	    }
295	  _APP("(", 1);
296	}
297	lexstate = ST_TYPEOF;
298	/* FALLTHRU */
299
300    case ST_TYPEOF:
301      switch (token)
302	{
303	case '(':
304	  if ( ++count == 1 )
305	    lexstate = ST_TYPEOF_1;
306	  else
307	    APP;
308	  goto repeat;
309	case ')':
310	  APP;
311	  if (--count == 0)
312	    {
313	      lexstate = ST_NORMAL;
314	      token = TYPEOF_PHRASE;
315	      break;
316	    }
317	  goto repeat;
318	default:
319	  APP;
320	  goto repeat;
321	}
322      break;
323
324    case ST_BRACKET:
325      APP;
326      switch (token)
327	{
328	case '[':
329	  ++count;
330	  goto repeat;
331	case ']':
332	  if (--count == 0)
333	    {
334	      lexstate = ST_NORMAL;
335	      token = BRACKET_PHRASE;
336	      break;
337	    }
338	  goto repeat;
339	default:
340	  goto repeat;
341	}
342      break;
343
344    case ST_BRACE:
345      APP;
346      switch (token)
347	{
348	case '{':
349	  ++count;
350	  goto repeat;
351	case '}':
352	  if (--count == 0)
353	    {
354	      lexstate = ST_NORMAL;
355	      token = BRACE_PHRASE;
356	      break;
357	    }
358	  goto repeat;
359	default:
360	  goto repeat;
361	}
362      break;
363
364    case ST_EXPRESSION:
365      switch (token)
366	{
367	case '(': case '[': case '{':
368	  ++count;
369	  APP;
370	  goto repeat;
371	case '}':
372	  /* is this the last line of an enum declaration? */
373	  if (count == 0)
374	    {
375	      /* Put back the token we just read so's we can find it again
376		 after registering the expression.  */
377	      unput(token);
378
379	      lexstate = ST_NORMAL;
380	      token = EXPRESSION_PHRASE;
381	      break;
382	    }
383	  /* FALLTHRU */
384	case ')': case ']':
385	  --count;
386	  APP;
387	  goto repeat;
388	case ',': case ';':
389	  if (count == 0)
390	    {
391	      /* Put back the token we just read so's we can find it again
392		 after registering the expression.  */
393	      unput(token);
394
395	      lexstate = ST_NORMAL;
396	      token = EXPRESSION_PHRASE;
397	      break;
398	    }
399	  APP;
400	  goto repeat;
401	default:
402	  APP;
403	  goto repeat;
404	}
405      break;
406
407    case ST_STATIC_ASSERT:
408      APP;
 
 
 
 
 
 
 
 
 
 
 
 
 
409      switch (token)
410	{
411	case '(':
412	  ++count;
413	  goto repeat;
414	case ')':
415	  if (--count == 0)
416	    {
417	      lexstate = ST_NORMAL;
418	      token = STATIC_ASSERT_PHRASE;
419	      break;
420	    }
421	  goto repeat;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422	default:
423	  goto repeat;
 
424	}
425      break;
426
427    default:
428      exit(1);
429    }
430fini:
431
432  if (suppress_type_lookup > 0)
433    --suppress_type_lookup;
434  if (dont_want_brace_phrase > 0)
435    --dont_want_brace_phrase;
436
437  yylval = &next_node->next;
438
439  return token;
440}

  1/* Lexical analysis for genksyms.
  2   Copyright 1996, 1997 Linux International.
  3
  4   New implementation contributed by Richard Henderson <rth@tamu.edu>
  5   Based on original work by Bjorn Ekwall <bj0rn@blox.se>
  6
  7   Taken from Linux modutils 2.4.22.
  8
  9   This program is free software; you can redistribute it and/or modify it
 10   under the terms of the GNU General Public License as published by the
 11   Free Software Foundation; either version 2 of the License, or (at your
 12   option) any later version.
 13
 14   This program is distributed in the hope that it will be useful, but
 15   WITHOUT ANY WARRANTY; without even the implied warranty of
 16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 17   General Public License for more details.
 18
 19   You should have received a copy of the GNU General Public License
 20   along with this program; if not, write to the Free Software Foundation,
 21   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
 22
 23
 24%{
 25
 26#include <limits.h>
 27#include <stdlib.h>
 28#include <string.h>
 29#include <ctype.h>
 30
 31#include "genksyms.h"
 32#include "parse.tab.h"
 33
 34/* We've got a two-level lexer here.  We let flex do basic tokenization
 35   and then we categorize those basic tokens in the second stage.  */
 36#define YY_DECL		static int yylex1(void)
 37
 38%}
 39
 40IDENT			[A-Za-z_\$][A-Za-z0-9_\$]*
 41
 42O_INT			0[0-7]*
 43D_INT			[1-9][0-9]*
 44X_INT			0[Xx][0-9A-Fa-f]+
 45I_SUF			[Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
 46INT			({O_INT}|{D_INT}|{X_INT}){I_SUF}?
 47
 48FRAC			([0-9]*\.[0-9]+)|([0-9]+\.)
 49EXP			[Ee][+-]?[0-9]+
 50F_SUF			[FfLl]
 51REAL			({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
 52
 53STRING			L?\"([^\\\"]*\\.)*[^\\\"]*\"
 54CHAR			L?\'([^\\\']*\\.)*[^\\\']*\'
 55
 56MC_TOKEN		([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
 57
 58/* We don't do multiple input files.  */
 59%option noyywrap
 60
 61%option noinput
 62
 63%%
 64
 65
 66 /* Keep track of our location in the original source files.  */
 67^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n	return FILENAME;
 68^#.*\n					cur_line++;
 69\n					cur_line++;
 70
 71 /* Ignore all other whitespace.  */
 72[ \t\f\v\r]+				;
 73
 74
 75{STRING}				return STRING;
 76{CHAR}					return CHAR;
 77{IDENT}					return IDENT;
 78
 79 /* The Pedant requires that the other C multi-character tokens be
 80    recognized as tokens.  We don't actually use them since we don't
 81    parse expressions, but we do want whitespace to be arranged
 82    around them properly.  */
 83{MC_TOKEN}				return OTHER;
 84{INT}					return INT;
 85{REAL}					return REAL;
 86
 87"..."					return DOTS;
 88
 89 /* All other tokens are single characters.  */
 90.					return yytext[0];
 91
 92
 93%%
 94
 95/* Bring in the keyword recognizer.  */
 96
 97#include "keywords.hash.c"
 98
 99
100/* Macros to append to our phrase collection list.  */
101
102/*
103 * We mark any token, that that equals to a known enumerator, as
104 * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
105 * the only problem is struct and union members:
106 *    enum e { a, b }; struct s { int a, b; }
107 * but in this case, the only effect will be, that the ABI checksums become
108 * more volatile, which is acceptable. Also, such collisions are quite rare,
109 * so far it was only observed in include/linux/telephony.h.
110 */
111#define _APP(T,L)	do {						   \
112			  cur_node = next_node;				   \
113			  next_node = xmalloc(sizeof(*next_node));	   \
114			  next_node->next = cur_node;			   \
115			  cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
116			  cur_node->tag =				   \
117			    find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
118			    SYM_ENUM_CONST : SYM_NORMAL ;		   \
 
119			} while (0)
120
121#define APP		_APP(yytext, yyleng)
122
123
124/* The second stage lexer.  Here we incorporate knowledge of the state
125   of the parser to tailor the tokens that are returned.  */
126
127int
128yylex(void)
129{
130  static enum {
131    ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_BRACKET, ST_BRACE,
132    ST_EXPRESSION, ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
133    ST_TABLE_5, ST_TABLE_6
134  } lexstate = ST_NOTSTARTED;
135
136  static int suppress_type_lookup, dont_want_brace_phrase;
137  static struct string_list *next_node;
 
138
139  int token, count = 0;
140  struct string_list *cur_node;
141
142  if (lexstate == ST_NOTSTARTED)
143    {
144      next_node = xmalloc(sizeof(*next_node));
145      next_node->next = NULL;
146      lexstate = ST_NORMAL;
147    }
148
149repeat:
150  token = yylex1();
151
152  if (token == 0)
153    return 0;
154  else if (token == FILENAME)
155    {
156      char *file, *e;
157
158      /* Save the filename and line number for later error messages.  */
159
160      if (cur_filename)
161	free(cur_filename);
162
163      file = strchr(yytext, '\"')+1;
164      e = strchr(file, '\"');
165      *e = '\0';
166      cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
167      cur_line = atoi(yytext+2);
168
 
 
 
 
 
 
 
169      goto repeat;
170    }
171
172  switch (lexstate)
173    {
174    case ST_NORMAL:
175      switch (token)
176	{
177	case IDENT:
178	  APP;
179	  {
180	    const struct resword *r = is_reserved_word(yytext, yyleng);
181	    if (r)
182	      {
183		switch (token = r->token)
184		  {
185		  case ATTRIBUTE_KEYW:
186		    lexstate = ST_ATTRIBUTE;
187		    count = 0;
188		    goto repeat;
189		  case ASM_KEYW:
190		    lexstate = ST_ASM;
191		    count = 0;
192		    goto repeat;
 
 
 
 
193
194		  case STRUCT_KEYW:
195		  case UNION_KEYW:
196		  case ENUM_KEYW:
197		    dont_want_brace_phrase = 3;
198		    suppress_type_lookup = 2;
199		    goto fini;
200
201		  case EXPORT_SYMBOL_KEYW:
202		      goto fini;
 
 
 
 
 
203		  }
204	      }
205	    if (!suppress_type_lookup)
206	      {
207		if (find_symbol(yytext, SYM_TYPEDEF, 1))
208		  token = TYPE;
209	      }
210	  }
211	  break;
212
213	case '[':
214	  APP;
215	  lexstate = ST_BRACKET;
216	  count = 1;
217	  goto repeat;
218
219	case '{':
220	  APP;
221	  if (dont_want_brace_phrase)
222	    break;
223	  lexstate = ST_BRACE;
224	  count = 1;
225	  goto repeat;
226
227	case '=': case ':':
228	  APP;
229	  lexstate = ST_EXPRESSION;
230	  break;
231
232	case DOTS:
233	default:
234	  APP;
235	  break;
236	}
237      break;
238
239    case ST_ATTRIBUTE:
240      APP;
241      switch (token)
242	{
243	case '(':
244	  ++count;
245	  goto repeat;
246	case ')':
247	  if (--count == 0)
248	    {
249	      lexstate = ST_NORMAL;
250	      token = ATTRIBUTE_PHRASE;
251	      break;
252	    }
253	  goto repeat;
254	default:
255	  goto repeat;
256	}
257      break;
258
259    case ST_ASM:
260      APP;
261      switch (token)
262	{
263	case '(':
264	  ++count;
265	  goto repeat;
266	case ')':
267	  if (--count == 0)
268	    {
269	      lexstate = ST_NORMAL;
270	      token = ASM_PHRASE;
271	      break;
272	    }
273	  goto repeat;
274	default:
275	  goto repeat;
276	}
277      break;
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279    case ST_BRACKET:
280      APP;
281      switch (token)
282	{
283	case '[':
284	  ++count;
285	  goto repeat;
286	case ']':
287	  if (--count == 0)
288	    {
289	      lexstate = ST_NORMAL;
290	      token = BRACKET_PHRASE;
291	      break;
292	    }
293	  goto repeat;
294	default:
295	  goto repeat;
296	}
297      break;
298
299    case ST_BRACE:
300      APP;
301      switch (token)
302	{
303	case '{':
304	  ++count;
305	  goto repeat;
306	case '}':
307	  if (--count == 0)
308	    {
309	      lexstate = ST_NORMAL;
310	      token = BRACE_PHRASE;
311	      break;
312	    }
313	  goto repeat;
314	default:
315	  goto repeat;
316	}
317      break;
318
319    case ST_EXPRESSION:
320      switch (token)
321	{
322	case '(': case '[': case '{':
323	  ++count;
324	  APP;
325	  goto repeat;
326	case '}':
327	  /* is this the last line of an enum declaration? */
328	  if (count == 0)
329	    {
330	      /* Put back the token we just read so's we can find it again
331		 after registering the expression.  */
332	      unput(token);
333
334	      lexstate = ST_NORMAL;
335	      token = EXPRESSION_PHRASE;
336	      break;
337	    }
338	  /* FALLTHRU */
339	case ')': case ']':
340	  --count;
341	  APP;
342	  goto repeat;
343	case ',': case ';':
344	  if (count == 0)
345	    {
346	      /* Put back the token we just read so's we can find it again
347		 after registering the expression.  */
348	      unput(token);
349
350	      lexstate = ST_NORMAL;
351	      token = EXPRESSION_PHRASE;
352	      break;
353	    }
354	  APP;
355	  goto repeat;
356	default:
357	  APP;
358	  goto repeat;
359	}
360      break;
361
362    case ST_TABLE_1:
363      goto repeat;
364
365    case ST_TABLE_2:
366      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
367	{
368	  token = EXPORT_SYMBOL_KEYW;
369	  lexstate = ST_TABLE_5;
370	  APP;
371	  break;
372	}
373      lexstate = ST_TABLE_6;
374      /* FALLTHRU */
375
376    case ST_TABLE_6:
377      switch (token)
378	{
379	case '{': case '[': case '(':
380	  ++count;
381	  break;
382	case '}': case ']': case ')':
383	  --count;
384	  break;
385	case ',':
386	  if (count == 0)
387	    lexstate = ST_TABLE_2;
388	  break;
389	};
390      goto repeat;
391
392    case ST_TABLE_3:
393      goto repeat;
394
395    case ST_TABLE_4:
396      if (token == ';')
397	lexstate = ST_NORMAL;
398      goto repeat;
399
400    case ST_TABLE_5:
401      switch (token)
402	{
403	case ',':
404	  token = ';';
405	  lexstate = ST_TABLE_2;
406	  APP;
407	  break;
408	default:
409	  APP;
410	  break;
411	}
412      break;
413
414    default:
415      exit(1);
416    }
417fini:
418
419  if (suppress_type_lookup > 0)
420    --suppress_type_lookup;
421  if (dont_want_brace_phrase > 0)
422    --dont_want_brace_phrase;
423
424  yylval = &next_node->next;
425
426  return token;
427}