/*   example recursive descent parser for regular expressions */

#include <stdio.h>
#include <ctype.h>

/* special tokens */
#define END       0
#define ID        1
#define UNKNOWN   2

/* node numbers */
#define S_N       0
#define E_N       1
#define E2_N      2
#define T_N       3
#define T2_N      4
#define F_N       5
#define F2_N      6
#define A_N       7

#define N_NODES   8

char *nodename[N_NODES] = { "S", "E", "E2", "T", "T2", "F", "F2", "A" };

/* op types */
#define NO_OP     0
#define BAR_OP    1
#define CONCAT_OP 2
#define STAR_OP   3
#define PAREN_OP  4
#define ERROR_OP  5

#define N_OPS     6

char opname[N_OPS] = { ' ', '|', '.', '*', 'P', '?' };

typedef struct treestruct
{
  int val;
  int op;
  struct treestruct * lft;
  struct treestruct * rgt;
} treenode, *tree;

int      yylex();
tree     new_node(int, int);
tree     S(), E(), E2(), T(), T2(), F(), F2(), A();
void     print_error(int), print_node(tree, int), preorder(tree, int);
char *   malloc(int);
int      match(int);

int yytext;
int token = UNKNOWN;
int error_count = 0;

main() {
  tree root;

  token = yylex();
  root = S();

  printf("\nparse tree (preorder)\n");
  preorder(root, 0);
}

tree S() {
  tree t = new_node(S_N, NO_OP);
  t->lft = E();
  if (!match(END))
    print_error(END);
  return t;
}

tree E() {
  tree t = new_node(E_N, NO_OP);
  t->lft = T();
  t->rgt = E2();
  return t;
}

tree E2() {
  tree t = new_node(E2_N, NO_OP);
    
  if (match('|'))
  {
    t->op = BAR_OP;
    t->lft = T();
    t->rgt = E2();
  }
  return t;
}

tree T() {
  tree t = new_node(T_N, NO_OP);
    
  t->lft = F();
  t->rgt = T2();
  return t;
}

tree T2() {
  tree t = new_node(T2_N, NO_OP);
  
  if (token == ID || token == '(')
  {
    t->op = CONCAT_OP;
    t->lft = F();
    t->rgt = T2();
  }
  return t;
}

tree F() {
  tree t = new_node(F_N, NO_OP);
  
  t->lft = A();
  t->rgt = F2();
  return t;
}

tree F2()
{
  tree t = new_node(F2_N, NO_OP);
  
  if (match('*'))
  {
    t->op = STAR_OP;
    t->lft = F2();
  }
  return t;
}

tree A() {
  tree t = new_node(A_N, NO_OP);
    
  if (match('('))
  {
    t->op = PAREN_OP;
    t->lft = E();
    if (!match(')'))
      print_error(')');
  }
  else 
  {
    t->op = yytext;
    if (!match(ID))
      print_error(ID);
  }
  return t;
}


int match(int tok) {
  if (tok == token)
  {
    token = yylex();
    return 1;
  }
  else
    return 0;
}

int yylex() {
  if (token == END)
    return END;

  yytext = getchar();
  
  if (yytext == EOF || yytext == '\n')
    return END;
  else if (yytext == '|' || yytext == '*' ||
           yytext == '(' || yytext == ')')
    return yytext;
  else if (isalpha(yytext))
    return ID;
  else if (isspace(yytext))
    return yylex();
  else
  {
    print_error(UNKNOWN);
    yytext = 'X';
    return ID;
  }
}

void print_error(int kind) {
  printf("\nsyntax error near %c:", yytext);
  if (kind == UNKNOWN)
    printf("unexpected character");
  else if (kind == END)
    printf("expected EOF");
  else if (kind == ')')
    printf("expected ')'");
  else if (kind == ID)
    printf("expected  identifier");
  printf("\n");
  ++error_count;
}

tree new_node(int nodetype, int optype) {
  tree t = (tree) (malloc(sizeof(treenode)));
  t->lft = t->rgt = NULL;
  t->val = nodetype;
  t->op = optype;
  return t;
}

void print_node(tree t, int level) {
  int i;
  for (i = 0; i < level; ++i)
    putchar(' ');
  
  printf("%s", nodename[t->val]);
  if (t->op < N_OPS)
    printf("(%c)", opname[t->op]);
  else
    printf("(%c)", t->op);
  
  printf("\n");
}


void preorder(tree t, int level) {
  if (t != NULL)
  {
    print_node(t, level);
    preorder(t->lft, level+1);
    preorder(t->rgt, level+1);
  }
}


