logo资料库

LEX与YACC实现C语言词法分析和语法分析.doc

第1页 / 共15页
第2页 / 共15页
第3页 / 共15页
第4页 / 共15页
第5页 / 共15页
第6页 / 共15页
第7页 / 共15页
第8页 / 共15页
资料共15页,剩余部分请下载后查看
ANSI C grammar, Lex specification
ANSI C Yacc grammar
ANSI C grammar, Lex specification In 1985, Jeff Lee published this Lex specification together with a Yacc grammar for the April 30, 1985 ANSI C draft. Tom Stockfisch reposted both to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z. I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies. Jutta Degener, 1995 D L H E FS IS %{ #include #include "y.tab.h" void count(); %} %% "/*" "auto" "break" "case" "char" "const" "continue" "default" "do" "double" "else" "enum" "extern" "float" [0-9] [a-zA-Z_] [a-fA-F0-9] [Ee][+-]?{D}+ (f|F|l|L) (u|U|l|L)* { comment(); } { count(); return(AUTO); } { count(); return(BREAK); } { count(); return(CASE); } { count(); return(CHAR); } { count(); return(CONST); } { count(); return(CONTINUE); } { count(); return(DEFAULT); } { count(); return(DO); } { count(); return(DOUBLE); } { count(); return(ELSE); } { count(); return(ENUM); } { count(); return(EXTERN); } { count(); return(FLOAT); }
"for" "goto" "if" "int" "long" "register" "return" "short" "signed" "sizeof" "static" "struct" "switch" "typedef" "union" "unsigned" "void" "volatile" "while" { count(); return(FOR); } { count(); return(GOTO); } { count(); return(IF); } { count(); return(INT); } { count(); return(LONG); } { count(); return(REGISTER); } { count(); return(RETURN); } { count(); return(SHORT); } { count(); return(SIGNED); } { count(); return(SIZEOF); } { count(); return(STATIC); } { count(); return(STRUCT); } { count(); return(SWITCH); } { count(); return(TYPEDEF); } { count(); return(UNION); } { count(); return(UNSIGNED); } { count(); return(VOID); } { count(); return(VOLATILE); } { count(); return(WHILE); } {L}({L}|{D})* { count(); return(check_type()); } 0[xX]{H}+{IS}? 0{D}+{IS}? {D}+{IS}? L?'(\\.|[^\\'])+' { count(); return(CONSTANT); } { count(); return(CONSTANT); } { count(); return(CONSTANT); } { count(); return(CONSTANT); } {D}+{E}{FS}? { count(); return(CONSTANT); } {D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); } {D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); } L?\"(\\.|[^\\"])*\" { count(); return(STRING_LITERAL); } "..." ">>=" "<<=" "+=" "-=" "*=" "/=" "%=" "&=" "^=" "|=" { count(); return(ELLIPSIS); } { count(); return(RIGHT_ASSIGN); } { count(); return(LEFT_ASSIGN); } { count(); return(ADD_ASSIGN); } { count(); return(SUB_ASSIGN); } { count(); return(MUL_ASSIGN); } { count(); return(DIV_ASSIGN); } { count(); return(MOD_ASSIGN); } { count(); return(AND_ASSIGN); } { count(); return(XOR_ASSIGN); } { count(); return(OR_ASSIGN); }
">>" "<<" "++" "--" "->" "&&" "||" "<=" ">=" "==" "!=" ";" ("{"|"<%") ("}"|"%>") "," ":" "=" "(" ")" ("["|"<:") ("]"|":>") "." "&" "!" "~" "-" "+" "*" "/" "%" "<" ">" "^" "|" "?" { count(); return(RIGHT_OP); } { count(); return(LEFT_OP); } { count(); return(INC_OP); } { count(); return(DEC_OP); } { count(); return(PTR_OP); } { count(); return(AND_OP); } { count(); return(OR_OP); } { count(); return(LE_OP); } { count(); return(GE_OP); } { count(); return(EQ_OP); } { count(); return(NE_OP); } { count(); return(';'); } { count(); return('{'); } { count(); return('}'); } { count(); return(','); } { count(); return(':'); } { count(); return('='); } { count(); return('('); } { count(); return(')'); } { count(); return('['); } { count(); return(']'); } { count(); return('.'); } { count(); return('&'); } { count(); return('!'); } { count(); return('~'); } { count(); return('-'); } { count(); return('+'); } { count(); return('*'); } { count(); return('/'); } { count(); return('%'); } { count(); return('<'); } { count(); return('>'); } { count(); return('^'); } { count(); return('|'); } { count(); return('?'); } [ \t\v\n\f] . { count(); } { /* ignore bad characters */ } %% yywrap() { return(1);
} comment() { char c, c1; loop: while ((c = input()) != '*' && c != 0) putchar(c); if ((c1 = input()) != '/' && c != 0) { unput(c1); goto loop; } if (c != 0) putchar(c1); } int column = 0; void count() { int i; for (i = 0; yytext[i] != '\0'; i++) if (yytext[i] == '\n') column = 0; else if (yytext[i] == '\t') column += 8 - (column % 8); else column++; ECHO; } int check_type() { /* * pseudo code --- this is what it should check
* * * * * */ /* * */ } if (yytext == type_name) return(TYPE_NAME); return(IDENTIFIER); it actually will only return IDENTIFIER return(IDENTIFIER); ANSI C Yacc grammar In 1985, Jeff Lee published his Yacc grammar (which is accompanied by a matching Lex specification) for the April 30, 1985 draft version of the ANSI C standard. Tom Stockfisch reposted it to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z. Jutta Degener, 1995 %token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN %token XOR_ASSIGN OR_ASSIGN TYPE_NAME %token TYPEDEF EXTERN STATIC AUTO REGISTER %token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID %token STRUCT UNION ENUM ELLIPSIS %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN %start translation_unit %%
primary_expression : IDENTIFIER | CONSTANT | STRING_LITERAL | '(' expression ')' ; postfix_expression : primary_expression | postfix_expression '[' expression ']' | postfix_expression '(' ')' | postfix_expression '(' argument_expression_list ')' | postfix_expression '.' IDENTIFIER | postfix_expression PTR_OP IDENTIFIER | postfix_expression INC_OP | postfix_expression DEC_OP ; argument_expression_list : assignment_expression | argument_expression_list ',' assignment_expression ; unary_expression : postfix_expression | INC_OP unary_expression | DEC_OP unary_expression | unary_operator cast_expression | SIZEOF unary_expression | SIZEOF '(' type_name ')' ; unary_operator : '&' | '*' | '+' | '-' | '~' | '!' ; cast_expression : unary_expression | '(' type_name ')' cast_expression
; multiplicative_expression : cast_expression | multiplicative_expression '*' cast_expression | multiplicative_expression '/' cast_expression | multiplicative_expression '%' cast_expression ; additive_expression : multiplicative_expression | additive_expression '+' multiplicative_expression | additive_expression '-' multiplicative_expression ; shift_expression : additive_expression | shift_expression LEFT_OP additive_expression | shift_expression RIGHT_OP additive_expression ; relational_expression : shift_expression | relational_expression '<' shift_expression | relational_expression '>' shift_expression | relational_expression LE_OP shift_expression | relational_expression GE_OP shift_expression ; equality_expression : relational_expression | equality_expression EQ_OP relational_expression | equality_expression NE_OP relational_expression ; and_expression : equality_expression | and_expression '&' equality_expression ; exclusive_or_expression : and_expression | exclusive_or_expression '^' and_expression ;
inclusive_or_expression : exclusive_or_expression | inclusive_or_expression '|' exclusive_or_expression ; logical_and_expression : inclusive_or_expression | logical_and_expression AND_OP inclusive_or_expression ; logical_or_expression : logical_and_expression | logical_or_expression OR_OP logical_and_expression ; conditional_expression : logical_or_expression | logical_or_expression '?' expression ':' conditional_expression ; assignment_expression : conditional_expression | unary_expression assignment_operator assignment_expression ; assignment_operator : '=' | MUL_ASSIGN | DIV_ASSIGN | MOD_ASSIGN | ADD_ASSIGN | SUB_ASSIGN | LEFT_ASSIGN | RIGHT_ASSIGN | AND_ASSIGN | XOR_ASSIGN | OR_ASSIGN ; expression : assignment_expression | expression ',' assignment_expression
分享到:
收藏