/*parser.cc This is a recursive-descent parser for a language of well-formed formulae (WFF's) in the propositional calculus. It demonstrates techniques to be understood and used in the design of your PURPLE parser, part of the final project for computer science at the Duke University Talent Identification Program. Copyright (c) 1998 by Matthew Belmonte.*/ /*There is, in general, one parsing function for each nonterminal symbol in the grammar. Refer to the grammar (or syntax chart) to understand how these routines interact with each other. Some regions of the code are commented very lightly, because their structure and function follow from the grammar. -There are two important and related invariant conditions over all these parsing functions. On entry, the first token in the phrase about to be parsed is in 'token'. On exit, the token immediately following the last token in the expression just parsed is in 'token' (except in the case of parse_program, where, if all is well, there is nothing following the '.'). To maintain these invariants, it is necessary to keep track of tokens that delimit expressions and to eat them where appropriate. -In other words, each parsing function assumes that the input stream of tokens has been prepared for it, and it in turn prepares the input stream for whatever parsing function may be called next.*/ /*grammar for WFFs: Formula -> WFF end-token WFF -> WFF term-op Term | Term term-op -> implies-token | equiv-token Term -> Term arg-op Argument | Argument arg-op -> and-token | or-token Argument -> not-token Argument | left-p-token WFF right-p-token | identifier-token lexical definitions: end-token: . implies-token: => equiv-token: = and-token: AN or-token: OR not-token: ~ left-p-token: ( right-p-token: ) identifier-token: A | B | C | ... | Z Note that the two left-recursive productions WFF -> WFF term-op Term and Term -> Term arg-op Argument can be expressed as regular expressions WFF -> Term [term-op Term]* and Term -> Argument [arg-op Argument]*, respectively. The parsing functions for these symbols act according to such a translation. */ #include "scanner.h" #include "parser.h" #include #include Tree::Tree(Token op) { oper = op; left = 0; right = 0; } Tree::Tree(Token op, Tree *child) { oper = op; left = child; right = 0; } Tree::Tree(Token op, Tree *lchild, Tree *rchild) { oper = op; left = lchild; right = rchild; } static Token token; static Tree *parse_identifier() { Tree *ptr; if(token.type == IdentifierToken) { ptr = new Tree(token); /*create a new leaf node*/ token.next(); /*have the next token ready*/ return(ptr); } else { cout << "bad identifier "; /*report error*/ token.print(); /*print the offending token*/ cout << endl; exit(1); /*halt*/ } } static Tree *parse_wff(); static Tree *parse_argument() { Tree *arg; if(token.type == NotToken) { arg = new Tree(token); token.next(); arg->left = parse_argument(); } else if(token.type == LeftParenToken) { token.next(); /*eat '('*/ arg = parse_wff(); if(token.type != RightParenToken) { cout << "missing right parenthesis before "; token.print(); cout << endl; exit(1); } token.next(); /*eat ')'*/ } else arg = parse_identifier(); return(arg); } /*Note the similarity between the functions parse_term() and parse_wff(). This is natural because the grammar productions that implement terms and wffs are similar. It's possible to replace these two similar functions with a more general one that takes a few parameters. You might want to do something like this when you write your parser. Keep in mind that a function can take a pointer to another function as a parameter. So for example if a function parse_X is passed another parsing function, parse_Y, as a parameter, parse_X can apply parse_Y without knowing exactly what parse_Y does: Tree *parse_X(Tree *(*parse_Y)(), ... ) { ... t = (*parse_Y)(); ... } */ static Tree *parse_term() { Tree *left_tree, *tree; left_tree = parse_argument(); /*invariant: 'left_tree' is a parse tree that represents all the Arguments in the Term that are to the left of 'token'*/ while((token.type == AndToken) || (token.type == OrToken)) { tree = new Tree(token, left_tree); token.next(); tree->right = parse_argument(); left_tree = tree; } return(left_tree); } static Tree *parse_wff() { Tree *left_tree, *tree; left_tree = parse_term(); /*invariant: 'left_tree' is a parse tree that represents all the Terms in the WFF that are to the left of 'token'*/ while((token.type == ImpliesToken) || (token.type == EquivToken)) { tree = new Tree(token, left_tree); token.next(); tree->right = parse_argument(); left_tree = tree; } return(left_tree); } /*top-level parsing function*/ Tree *parse_formula() { Tree *formula; init_scanner(); token.next(); formula = parse_wff(); if(token.type != PeriodToken) { cout << "unexpected token at end of input: "; token.print(); exit(1); } return(formula); } /*print an indented subtree*/ static void print_subtree(Tree *t, int indent) { register int spaces; /*invariant: all nodes to the left of 't' have been printed*/ while(t != 0) { for(spaces = 0; spaces != indent; spaces++) cout << ' '; t->oper.print(); cout << endl; print_subtree(t->left, 2+indent); indent += 2; t = t->right; } } /*print the contents of a tree - useful for debugging*/ void print_tree(Tree *t) { print_subtree(t, 0); }