/*  LL parser for regular expression grammar - Doug Lea

    Original Grammar:
       E -> id | EE | E`|'E | E`*' | `(` E ')'

    Transformed For LL parser into
       S  -> E$
       E  -> T E2
       E2 -> `|' E3 
       E2 -> NIL
       E3 -> T E2
       T  -> F T2
       T2 -> F T2
       T2 -> NIL
       F  -> A F2
       F2 -> `*' F2
       F2 -> NIL
       A  -> id
       A  -> `(' A2
       A2 -> E ')'
*/

#include <stdio.h>
#include <ctype.h>

#define NNONTERM     10  /* Number of NonTerminals */
#define NTOKENS       6  /* Number of Tokens */
#define NSYMBOLS     18  /* Tokens + NonTerminals + NIL, ERR */

/* symbols declared as simple enums */
typedef enum symbolenum
{

/* NonTerminals */
/*          first         Follows             */
  S,     /* {id, ( }      {$}                 */
  E,     /* {id, ( }      {$, )}              */
  E2,    /* {|, NIL}      {$, )}              */
  E3,    /* {id, (}       {$, )}              */
  T,     /* {id, (}       {$, ), |}           */
  T2,    /* {id, (, NIL}  {$, ), |}           */
  F,     /* {id, (}       {$, ), |, (, id}    */
  F2,    /* {*, NIL}      {$, ), |, (, id}    */
  A,     /* {id, ( }      {$, ), |, (, id, *} */
  A2,    /* {id, ( }      {$, ), |, (, id, *} */

/* (pseudosymbols) */  
  NIL, ERR,               

/* tokens */
  ID, LP, RP, BAR, STAR, END
}
symbol;

char *psym[NSYMBOLS] = /* Printable versions for tracing */
{ "S  ", "E  ", "E2 ", "E3 ", "T  ", "T2 ", "F  ", "F2 ", "A  ",
  "A2 ", "NIL", "ERR", "id ", "(  ", ")  ", "|  ", "*  ", "$  "
};

/* Little utilities to determine which symbols are tokens */

inline int is_token(symbol s) {  return s >= ID && s <= END; }
inline int token_to_table_index(symbol s) {  return s - ID; }


/* Productions */ 

typedef symbol prod[2]; /* All productions of rhs length 2 */

                           /* First(rhs)   Follows(rhs)     */
prod Sa =   {E,    END};   /* {id, (}      NA               */
prod Ea =   {T,    E2};    /* {id, (}      NA               */
prod E2a =  {BAR,  E3};    /* {|}          NA               */
prod E3a =  {T,    E2};    /* {id, (}      NA               */
prod E2b =  {NIL,  NIL};   /* {}           {$, )}           */
prod Ta =   {F,    T2};    /* {id, (}      NA               */
prod T2a =  {F,    T2};    /* {id, (}      NA               */
prod T2b =  {NIL,  NIL};   /* {}           {$, ), |}        */
prod Fa =   {A,    F2};    /* {id, (}      NA               */
prod F2a =  {STAR, F2};    /* {*}          NA               */
prod F2b =  {NIL,  NIL};   /* {}           {$, ), |, id, (} */
prod Aa =   {ID,   NIL};   /* {id}         NA               */
prod Ab =   {LP,   A2};    /* {(}          NA               */
prod A2a =  {E,    RP};    /* {id, (}      NA               */

prod _I_ =  {ERR, ERR};    /* Dummy production for error entries */

symbol *table [NNONTERM][NTOKENS] = /* parse table */
{
/*           id   '('    ')'   '|'   '*'    $      */  
/* S    */ { Sa,   Sa,   _I_,  _I_,  _I_,  _I_  },
/* E    */ { Ea,   Ea,   _I_,  _I_,  _I_,  _I_  },
/* E2   */ { _I_,  _I_,  E2b,  E2a,  _I_,  E2b  },
/* E3   */ { E3a,  E3a,  _I_,  _I_,  _I_,  _I_  },
/* T    */ { Ta,   Ta,   _I_,  _I_,  _I_,  _I_  },
/* T2   */ { T2a,  T2a,  T2b,  T2b,  _I_,  T2b  },
/* F    */ { Fa,   Fa,   _I_,  _I_,  _I_,  _I_  },
/* F2   */ { F2b,  F2b,  F2b,  F2b,  F2a,  F2b  },
/* A    */ { Aa,   Ab,   _I_,  _I_,  _I_,  _I_  },
/* A2   */ { A2a,  A2a,  _I_,  _I_,  _I_,  _I_  }
};

/* Parse stack implemented via simple bounded array */

#define STACK_SIZE 1000
class ParseStack {
  int s[STACK_SIZE];
  int sp;

  ParseStack :sp(0) {}
  void   push(symbol x) { if (x != NIL) s[sp++] = x; }
  symbol pop()          { return s[--sp]; }
  bool   empty()        { return sp == 0; }
};



/* Rudimentary lexical analysis */ 

symbol token = NIL;
char  lval;

void advance() {
  if (token == END) return;
  lval = getchar();
  if (lval == EOF || lval == '\n') { lval = '$'; token = END; }
  else if (lval == '|')            token = BAR;
  else if (lval == '*')            token = STAR;
  else if (lval == '(')            token = LP;
  else if (lval == ')')            token = RP;
  else if (isalpha(lval))          token = ID;
  else if (isspace(lval))          advance();
  else                             { printf("error at %c",lval); advance(); }
}

class Lex {
  char yytext;
  char token;

  void next() { // very simple scanner to advance token
    if (token == END) return;
    else if (!cin.get(yytext))               token = END; 
    else if (yytext == '\n')                 token = END;
    else if (yytext == '|')                  token = BAR;
    else if (yytext == '*')                  token = STAR;
    else if (yytext == '(')                  token = LP;
    else if (yytext == ')')                  token = RP;
    else if (isalpha(yytext))                token = ID;
    else if (isspace(yytext))                next();
    else { cout << "Unexpected character: " << yytext << endl; next(); }
  }

public:
  Lex() :token(0), yytext(0) { next(); }

  bool check(char tok) { return tok == token;  }

  bool match(char tok) { 
    if (tok == token) { next(); return 1; } else return 0;
};


int verbose = 1;  /* Set to 0 for quiet parse */

/* The parser */

main() {
  symbol s = NIL;

  initstack();
  push(S);
  advance();

  while (!empty() && s != ERR)
  {
    symbol s = pop();
    if (verbose) printf("[%s, %s]: ", psym[s], psym[token]);

    if (is_token(s)) 
    {
      if (token == s)
      {
        if (verbose) printf(" <match>\n");
        advance();
      }
      else
      {
        printf("Error: expected %s but found %s\n", psym[s], psym[token]);
        break;
      }
    }
    else
    {
      push(table[s][token_to_table_index(token)][1]);
      push(table[s][token_to_table_index(token)][0]);

      if (verbose)
      {
        int i;
        printf("{ ");
        for (i = 0; i < stack.sp; ++i) printf("%s", psym[stack.s[i]]);
        printf("}\n");
      }
    }
  }
}
