
/* cclex.c - lexical analyzer for compiler */

#include <stdio.h>
#ifndef M6502
/* every time I thing some C weenie someplace might have opted to do
   something in a regular fashion, I get disappointed... */
#include <errno.h>
#endif
#include "cc65.h"
#include "cclex.h"

#define ch()	(*lptr)

int amp();
int bang();
int bar();
int caret();
int dollar();
int equal();
int ident();
int langle();
int minus();
int number();
int percent();
int plus();
int pstr();
int qstr();
int rangle();
int slash();
int star();
int symbol();
#ifndef M6502
int under();
#endif
int unkn();

int (*stab[128]) () =
{
  unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,
  unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,
  unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,
  unkn, unkn, unkn, unkn, unkn, unkn, unkn, unkn,
  unkn,	/* <blank> */
  bang,			/* ! */
  qstr,			/* " */
  unkn,			/* # */
  dollar,		/* $ */
  percent,		/* % */
  amp,			/* & */
  pstr,			/* 'character const' */
  LPAREN,		/* ( */
  RPAREN,		/* ) */
  star,			/* * */
  plus,			/* + */
  COMMA,		/* , */
  minus,		/* - */
  DOT,			/* . */
  slash,		/* / */
  number, number, number, number, number,	/* 0 - 4 */
  number, number, number, number, number,	/* 5 - 9 */
  COLON,
  SEMI,
  langle,		/* < */
  equal,		/* = */
  rangle,		/* > */
  QUEST,
  unkn,
  ident, ident, ident, ident, ident, ident, ident, ident,	/* A - H */
  ident, ident, ident, ident, ident, ident, ident, ident,	/* I - P */
  ident, ident, ident, ident, ident, ident, ident, ident,	/* Q - X */
  ident, ident,
  LBRACK,
  unkn,			/* \ */
  RBRACK,
  caret,		/* ^ */
#ifndef M6502
/* only include this in cross compiler */
  under,		/* _ */
#else
/* save space by not treating underbars special in native compiler */
  ident,
#endif
  unkn,	/* ` */
  symbol, symbol, symbol, symbol, symbol, symbol, symbol, symbol,	/* a - h */
  symbol, symbol, symbol, symbol, symbol, symbol, symbol, symbol,	/* i - p */
  symbol, symbol, symbol, symbol, symbol, symbol, symbol, symbol,	/* q - x */
  symbol, symbol,
  LCURLY,
  bar,			/* | */
  RCURLY,
  COMP,
  unkn			/* <DEL> */
};


/*
  gettok()
  Make next token current token.  And read another token from the
  input stream.
*/
static char * pp_line = ";;";
static char * old_lptr;

static int asm_unkludge()	/* return t if still kludged */
{
  if (asm_kludge > 0)
    {
      if (--asm_kludge == 1)	/* down to 1? */
	{
	  lptr = old_lptr;	/* restore old line */
	  doasm();		/* do the asm stuff */
	  kill();
	}
      else
	{
	  lptr = pp_line;
	  return (1);
	}
    }
  return (0);
}

gettok()
{
int (*f) ();

#ifdef M6502
  chkbrk();			/* check for user hitting break key */
#endif
  curtok = nxttok;
  curval = nxtval;
  if (asm_unkludge())
    {
      curtok = SEMI;
      return;
    }
  /*
   * pick up next token from input stream.
   */
  while (1)
    {
      while (ch() == 0)
	{
	  if (readline() == 0)
	    {
	      nxttok = CEOF;
	      return;
	    }
	  preprocess();
/* kludge added by jrd.  if see a #asm directive coming up,
   force a semicolon token, to terminate previous stmt.  This'll
   lose if you do it in the middle of something like a for stmt.
   big deal... */
	  if (matchstr(lptr, "#asm")) /* see a #asm dir? */
	    {
	      asm_kludge = 3;	/* force 2 semicolons */
	      old_lptr = lptr;
	      lptr = pp_line;
	      break;
	    }
	}
      if (ch() == ' ')
	++lptr;
      else
	break;
    }

#ifdef M6502
  if ((nxttok = (int) stab[ch()]) & 0xFF00)
#else
  if ((nxttok = (int) stab[ch()]) & 0xFFFFFF00)
#endif
    {
      (*((int (*) ()) nxttok)) ();
    }
  else
    {
      ++lptr;
    }
}

#ifndef M6502
/* only grok special symbols if not native compiler */
under()
{
char * q;
char token[NAMESIZE];

  symname(token);
  if (token[1] == '_')
    {
      if (strcmp(token, "__FILE__") == 0)
	{
	  nxtval = litptr;
	  q = fin;
	  while (*q)
	    litq[litptr++] = *q++;
	  litq[litptr++] = '\0';
	  nxttok = SCONST;
	  return;
	}
      else if (strcmp(token, "__LINE__") == 0)
	{
	  nxttok = ICONST;
	  nxtval = ln;
	  return;
	}
    }
  nxtval = addsym(token);
  nxttok = IDENT;
}

#endif

symbol()
{
char token[NAMESIZE];

  symname(token);
  if (nxttok = Search(token))
    {
      return;
    }
  else
    {
      nxtval = addsym(token);
      nxttok = IDENT;
    }
}

ident()
{
char token[NAMESIZE];

  symname(token);
  nxtval = addsym(token);
  nxttok = IDENT;
}

/* util fun used by dollar().  set nxttok and bump line ptr */
rettok(tok)
int tok;
{
  nxttok = tok;
  ++lptr;
}

dollar()
{
int c;

  switch (c = *++lptr)
    {
    case '(':
      rettok(LCURLY);
      break;
    case ')':
      rettok(RCURLY);
      break;
    case '-':
      rettok(COMP);
      break;
#ifdef old_cruft
      else
      if (c == '<')
	{
	  gch();
	  nxttok = 201;
	}
      else if (c == '>')
	{
	  gch();
	  nxttok = 202;
	}
      else if (c == '+')
	{
	  gch();
	  nxttok = 203;
	}
      else if (c == '#')
	{
	  gch();
	  nxttok = 204;
	}
#endif
    default:
      unkn();
    }
}

plus()
{
int c;

  switch (c = *++lptr)
    {
    case '+':
      rettok(INC);
      break;
    case '=':
      rettok(PASGN);
      break;
    default:
      nxttok = PLUS;
    }
}

minus()
{
int c;

  switch (c = *++lptr)
    {
    case '-':
      rettok(DEC);
      break;
    case '=':
      rettok(SASGN);
      break;
    case '>':
      rettok(PREF);
      break;
    default:
      nxttok = MINUS;
    }
}

star()
{
  if (*++lptr == '=')
    {
      rettok(MASGN);
    }
  else
    {
      nxttok = STAR;
    }
}

slash()
{
  if (*++lptr == '=')
    {
      rettok(DASGN);
    }
  else
    {
      nxttok = DIV;
    }
}

amp()
{
int c;

  switch (c = *++lptr)
    {
    case '&':
      rettok(DAMP);
      break;
    case '=':
      rettok(AASGN);
      break;
    default:
      nxttok = AMP;
    }
}

bar()
{
int c;

  switch (c = *++lptr)
    {
    case '|':
      rettok(DBAR);
      break;
    case '=':
      rettok(OASGN);
      break;
    default:
      nxttok = BAR;
    }
}

bang()
{
  if (*++lptr == '=')
    {
      rettok(NE);
    }
  else
    {
      nxttok = BANG;
    }
}

equal()
{
  if (*++lptr == '=')
    {
      rettok(EQ);
    }
  else
    {
      nxttok = ASGN;
    }
}

langle()
{
int c;

  switch (c = *++lptr)
    {
    case '=':
      rettok(LE);
      break;
    case '<':
      if (*++lptr == '=')
	{
	  rettok(SLASGN);
	}
      else
	{
	  nxttok = ASL;
	}
      break;
    default:
      nxttok = LT;
    }
}

rangle()
{
int c;

  switch (c = *++lptr)
    {
    case '=':
      rettok(GE);
      break;
    case '>':
      if (*++lptr == '=')
	{
	  rettok(SRASGN);
	}
      else
	{
	  nxttok = ASR;
	}
      break;
    default:
      nxttok = GT;
    }
}

caret()
{
  if (*++lptr == '=')
    {
      rettok(XOASGN);
    }
  else
    {
      nxttok = XOR;
    }
}

percent()
{
  if (*++lptr == '=')
    {
      rettok(MOASGN);
    }
  else
    {
      nxttok = MOD;
    }
}

/*
  issym( s )
  Get symbol from input stream or return 0 if not a symbol.
*/
issym(s)
char * s;
{
/*    extern char	ascii_tab[128]; */

  if (is_alpha(ch()))
    {
      symname(s);
      return (1);
    }
  else
    {
      return (0);
    }
}

/*
  symname( s )
  Get symbol from input stream or return 0 if not a symbol.
*/
symname(s)
char * s;
{
/*    extern char	ascii_tab[128]; */
int k;

  k = 0;
do
{
  if (k != NAMEMAX)
    {
      ++k;
      *s++ = * lptr++;
    }
  else
    {
      ++lptr;
    }
} while (is_alpha(ch()) || isdigit(ch()));
  *s = '\0';
}

number()
{
int base;
char c;
int k;

  k = 0;
  base = 10;

  if (ch() == '0')
    {
      /*
       * gobble 0 and examin next char
       */
      c = *++lptr;
      if (toupper(c) == 'X')
	{
	  base = 16;
	  ++lptr; /* gobble "x" */
	}
      else
	{
	  base = 8;
	}
    }

  while (1)
    {
      if (isdigit(c = toupper(ch())))
	{
	  k = k * base + (c - '0');
	}
      else if (base == 16)
	{
	  if (c >= 'A' && c <= 'F')
	    c -= 55;
	  else
/*
	    if (c >= 'a' && c <= 'f')
		c -= 87;
	      else
*/
	    break;		/* not hex */
	  k = k * base + c;
	}
      else
	break;			/* not digit */
      ++lptr;			/* gobble char */
    }

  nxtval = k;
  nxttok = ICONST;
}

/*
  pstr( val )
  Parse a character constant.
*/
pstr()
{
int k;
int c;

  k = 0;
  ++lptr;
  while ((c = cgch()) != 39 && c != '\0')
    {
      k = ((k & 0xFF) << 8) + parsech(c);
    }
  nxtval = k;
  nxttok = CCONST;
}

/*
  qstr( val )
  Parse a quoted string
*/
qstr()
{
  ++lptr;
  nxtval = litptr;
  nxttok = SCONST;
  while (ch() != '"')
    {
      if (ch() == 0)
	{
	  Error("new-line in string constant");
	  break;
	}
      if (litptr >= 512)
	{
	  fatal("string space exhausted");
	  /*
          while (gch() != '"')
	      if (ch() == 0) break;
          return;
          */
	}
      litq[litptr++] = parsech(gch());
    }
  cgch();
  litq[litptr++] = 0;
}

/* Parse a character.
 * converts \n into EOL, etc.
 */
parsech(chr)
int chr;
{
int c;
int i;
int oct;

  /* pass through unless backslash */
  if (chr != 92)
    return (chr);

  switch (c = ch())
    {
    case 'b':
      c = 126;
      break;
    case 'f':
      c = 125;
      break;			/* CLEAR */
    case 'g':
      c = 253;
      break;			/* Bell */
    case 'n':
      c = 155;
      break;			/* EOL */
    case 't':
      c = 127;
      break;			/* TAB */
    default:
      if (c >= '0' && c <= '7')
	{
	  /*
           * octal (yuk) constant
           */
	  oct = c - '0';
	  gch();
	  i = 1;
	  while (i <= 2)
	    {
	      if ((c = ch()) <'0' || c > '7')
		break;
	      oct = (oct << 3) + c - '0';
	      gch();
	      ++i;
	    }
	  return (oct);
	}
    }
  cgch();			/* skip code char */
  return (c);
}

unkn()
{
  fatal("unknown character");
}
