ANSI C grammar, Lex specification
In 1985, Jeff Lee published this Lex specification together with a Yacc
grammar for the April 30, 1985 ANSI C draft.
Tom Stockfisch reposted
both to net.sources in 1987; that original, as mentioned in the answer
to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net,
file usenet/net.sources/ansi.c.grammar.Z.
I intend to keep this version as close to the current C Standard grammar
as possible; please let me know if you discover discrepancies.
Jutta Degener, 1995
D
L
H
E
FS
IS
%{
#include
#include "y.tab.h"
void count();
%}
%%
"/*"
"auto"
"break"
"case"
"char"
"const"
"continue"
"default"
"do"
"double"
"else"
"enum"
"extern"
"float"
[0-9]
[a-zA-Z_]
[a-fA-F0-9]
[Ee][+-]?{D}+
(f|F|l|L)
(u|U|l|L)*
{ comment(); }
{ count(); return(AUTO); }
{ count(); return(BREAK); }
{ count(); return(CASE); }
{ count(); return(CHAR); }
{ count(); return(CONST); }
{ count(); return(CONTINUE); }
{ count(); return(DEFAULT); }
{ count(); return(DO); }
{ count(); return(DOUBLE); }
{ count(); return(ELSE); }
{ count(); return(ENUM); }
{ count(); return(EXTERN); }
{ count(); return(FLOAT); }
"for"
"goto"
"if"
"int"
"long"
"register"
"return"
"short"
"signed"
"sizeof"
"static"
"struct"
"switch"
"typedef"
"union"
"unsigned"
"void"
"volatile"
"while"
{ count(); return(FOR); }
{ count(); return(GOTO); }
{ count(); return(IF); }
{ count(); return(INT); }
{ count(); return(LONG); }
{ count(); return(REGISTER); }
{ count(); return(RETURN); }
{ count(); return(SHORT); }
{ count(); return(SIGNED); }
{ count(); return(SIZEOF); }
{ count(); return(STATIC); }
{ count(); return(STRUCT); }
{ count(); return(SWITCH); }
{ count(); return(TYPEDEF); }
{ count(); return(UNION); }
{ count(); return(UNSIGNED); }
{ count(); return(VOID); }
{ count(); return(VOLATILE); }
{ count(); return(WHILE); }
{L}({L}|{D})*
{ count(); return(check_type()); }
0[xX]{H}+{IS}?
0{D}+{IS}?
{D}+{IS}?
L?'(\\.|[^\\'])+'
{ count(); return(CONSTANT); }
{ count(); return(CONSTANT); }
{ count(); return(CONSTANT); }
{ count(); return(CONSTANT); }
{D}+{E}{FS}?
{ count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); }
L?\"(\\.|[^\\"])*\"
{ count(); return(STRING_LITERAL); }
"..."
">>="
"<<="
"+="
"-="
"*="
"/="
"%="
"&="
"^="
"|="
{ count(); return(ELLIPSIS); }
{ count(); return(RIGHT_ASSIGN); }
{ count(); return(LEFT_ASSIGN); }
{ count(); return(ADD_ASSIGN); }
{ count(); return(SUB_ASSIGN); }
{ count(); return(MUL_ASSIGN); }
{ count(); return(DIV_ASSIGN); }
{ count(); return(MOD_ASSIGN); }
{ count(); return(AND_ASSIGN); }
{ count(); return(XOR_ASSIGN); }
{ count(); return(OR_ASSIGN); }
">>"
"<<"
"++"
"--"
"->"
"&&"
"||"
"<="
">="
"=="
"!="
";"
("{"|"<%")
("}"|"%>")
","
":"
"="
"("
")"
("["|"<:")
("]"|":>")
"."
"&"
"!"
"~"
"-"
"+"
"*"
"/"
"%"
"<"
">"
"^"
"|"
"?"
{ count(); return(RIGHT_OP); }
{ count(); return(LEFT_OP); }
{ count(); return(INC_OP); }
{ count(); return(DEC_OP); }
{ count(); return(PTR_OP); }
{ count(); return(AND_OP); }
{ count(); return(OR_OP); }
{ count(); return(LE_OP); }
{ count(); return(GE_OP); }
{ count(); return(EQ_OP); }
{ count(); return(NE_OP); }
{ count(); return(';'); }
{ count(); return('{'); }
{ count(); return('}'); }
{ count(); return(','); }
{ count(); return(':'); }
{ count(); return('='); }
{ count(); return('('); }
{ count(); return(')'); }
{ count(); return('['); }
{ count(); return(']'); }
{ count(); return('.'); }
{ count(); return('&'); }
{ count(); return('!'); }
{ count(); return('~'); }
{ count(); return('-'); }
{ count(); return('+'); }
{ count(); return('*'); }
{ count(); return('/'); }
{ count(); return('%'); }
{ count(); return('<'); }
{ count(); return('>'); }
{ count(); return('^'); }
{ count(); return('|'); }
{ count(); return('?'); }
[ \t\v\n\f]
.
{ count(); }
{ /* ignore bad characters */ }
%%
yywrap()
{
return(1);
}
comment()
{
char c, c1;
loop:
while ((c = input()) != '*' && c != 0)
putchar(c);
if ((c1 = input()) != '/' && c != 0)
{
unput(c1);
goto loop;
}
if (c != 0)
putchar(c1);
}
int column = 0;
void count()
{
int i;
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
ECHO;
}
int check_type()
{
/*
* pseudo code --- this is what it should check
*
*
*
*
*
*/
/*
*
*/
}
if (yytext == type_name)
return(TYPE_NAME);
return(IDENTIFIER);
it actually will only return IDENTIFIER
return(IDENTIFIER);
ANSI C Yacc grammar
In 1985, Jeff Lee published his Yacc grammar (which is accompanied by a
matching Lex specification) for the April 30, 1985 draft version of the
ANSI C standard.
Tom Stockfisch reposted it to net.sources in 1987; that
original, as mentioned in the answer to question 17.25 of the comp.lang.c
FAQ, can be ftp'ed from ftp.uu.net, file
usenet/net.sources/ansi.c.grammar.Z.
Jutta Degener, 1995
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME
%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE
VOID
%token STRUCT UNION ENUM ELLIPSIS
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
%start translation_unit
%%
primary_expression
: IDENTIFIER
| CONSTANT
| STRING_LITERAL
| '(' expression ')'
;
postfix_expression
: primary_expression
| postfix_expression '[' expression ']'
| postfix_expression '(' ')'
| postfix_expression '(' argument_expression_list ')'
| postfix_expression '.' IDENTIFIER
| postfix_expression PTR_OP IDENTIFIER
| postfix_expression INC_OP
| postfix_expression DEC_OP
;
argument_expression_list
: assignment_expression
| argument_expression_list ',' assignment_expression
;
unary_expression
: postfix_expression
| INC_OP unary_expression
| DEC_OP unary_expression
| unary_operator cast_expression
| SIZEOF unary_expression
| SIZEOF '(' type_name ')'
;
unary_operator
: '&'
| '*'
| '+'
| '-'
| '~'
| '!'
;
cast_expression
: unary_expression
| '(' type_name ')' cast_expression
;
multiplicative_expression
: cast_expression
| multiplicative_expression '*' cast_expression
| multiplicative_expression '/' cast_expression
| multiplicative_expression '%' cast_expression
;
additive_expression
: multiplicative_expression
| additive_expression '+' multiplicative_expression
| additive_expression '-' multiplicative_expression
;
shift_expression
: additive_expression
| shift_expression LEFT_OP additive_expression
| shift_expression RIGHT_OP additive_expression
;
relational_expression
: shift_expression
| relational_expression '<' shift_expression
| relational_expression '>' shift_expression
| relational_expression LE_OP shift_expression
| relational_expression GE_OP shift_expression
;
equality_expression
: relational_expression
| equality_expression EQ_OP relational_expression
| equality_expression NE_OP relational_expression
;
and_expression
: equality_expression
| and_expression '&' equality_expression
;
exclusive_or_expression
: and_expression
| exclusive_or_expression '^' and_expression
;
inclusive_or_expression
: exclusive_or_expression
| inclusive_or_expression '|' exclusive_or_expression
;
logical_and_expression
: inclusive_or_expression
| logical_and_expression AND_OP inclusive_or_expression
;
logical_or_expression
: logical_and_expression
| logical_or_expression OR_OP logical_and_expression
;
conditional_expression
: logical_or_expression
| logical_or_expression '?' expression ':'
conditional_expression
;
assignment_expression
: conditional_expression
| unary_expression assignment_operator assignment_expression
;
assignment_operator
: '='
| MUL_ASSIGN
| DIV_ASSIGN
| MOD_ASSIGN
| ADD_ASSIGN
| SUB_ASSIGN
| LEFT_ASSIGN
| RIGHT_ASSIGN
| AND_ASSIGN
| XOR_ASSIGN
| OR_ASSIGN
;
expression
: assignment_expression
| expression ',' assignment_expression