/*scanner.cc This is a lexical scanner for a language of well-formed formulae (WFF's) in the propositional calculus. It demonstrates techniques to be understood and used in the design of your PURPLE scanner, part of the final project for computer science at the Duke University Talent Identification Program. Copyright (c) 1998 by Matthew Belmonte.*/ #include "scanner.h" #include #include #include #include static char current_char; /*next character to be scanned*/ void next_char() /*all input comes through here*/ { current_char = getchar(); } /*default constructor - leave fields empty*/ Token::Token(){} /*unary constructor - a type but no value*/ Token::Token(TokenType t) { type = t; identifier = (char)0; } /*binary constructor - implicit identifier token & a value for the identifier*/ Token::Token(char id) { type = IdentifierToken; identifier = id; } /*sets the token type and gets the next character*/ void Token::make_token(TokenType t) { type = t; next_char(); } /*The straightforward way to decide what token to produce is to use these conditional ('if') statements. This technique is fine for the case below of only two tokens, but for larger sets of possibilities it results in wasteful duplication of code. A better way is to write a function that takes as its parameters a character to be looked for, the token type to be produced if the next character matches that character, and the token type to be produced if the next character fails to match. You may want to use the more sophisticated technique in your project.*/ void Token::letter_start() { type = IdentifierToken; /*assume that this is an identifier,*/ identifier = current_char; /*not the beginning of a keyword*/ next_char(); /*look ahead to the next character*/ if((identifier == 'A') && (current_char == 'N')) make_token(AndToken); else if((identifier == 'O') && (current_char == 'R')) make_token(OrToken); } /*processes a token that begins with a non-alphanumeric character*/ void Token::symbol_start() { identifier = (char)0; /*identifier field is not used*/ switch(current_char) { case '~': make_token(NotToken); break; case '=': next_char(); if(current_char == '>') make_token(ImpliesToken); else type = EquivToken; break; case '(': make_token(LeftParenToken); break; case ')': make_token(RightParenToken); break; case '.': type = PeriodToken; break; /*don't next_char() at end*/ default: cout << "illegal character " << current_char << endl; exit(1); } } /*initialise the scanner by loading the first character*/ void init_scanner() { next_char(); } /*top-level scanner function*/ void Token::next() { while(isspace(current_char)) next_char(); /*skip blanks*/ if(isupper(current_char)) letter_start(); else symbol_start(); /*symbol_start() will detect any illegal character*/ } /*print the type and value of the token - useful in debugging*/ void Token::print() { switch(type) { case IdentifierToken: cout << "Identifier " << identifier; break; case NotToken: cout << '~'; break; case AndToken: cout << "AN"; break; case OrToken: cout << "OR"; break; case ImpliesToken: cout << "=>"; break; case EquivToken: cout << '='; break; case LeftParenToken: cout << '('; break; case RightParenToken: cout << ')'; break; case PeriodToken: cout << '.'; break; default: cout << "undefined token " << (int)type; } }