This might not be a good idea, the problem was that many spurious registers would be added to the Bits data-structures during compilation (and would always remain 0). However, doing the above modification de-uniformizes the handling of temps and regs, this makes the code longer and not really nicer. Also, additional Bits structures are required to track the registers independently. Overall this might be a bad idea to revert.
619 lines
9.7 KiB
C
619 lines
9.7 KiB
C
/* really crude parser
|
|
*/
|
|
#include "lisc.h"
|
|
#include <ctype.h>
|
|
|
|
enum {
|
|
NTmp = 256,
|
|
NCon = 256,
|
|
};
|
|
|
|
Ins insb[NIns], *curi;
|
|
|
|
OpDesc opdesc[OLast] = {
|
|
/* NAME ARTY C */
|
|
[OAdd] = { "add", 2, T },
|
|
[OSub] = { "sub", 2, F },
|
|
[ODiv] = { "div", 2, U },
|
|
[ORem] = { "rem", 2, U },
|
|
[OStore] = { "store", 2, U },
|
|
[OLoad] = { "load", 1, U },
|
|
[ONop] = { "nop", 0, U },
|
|
[OCopy] = { "copy", 1, U },
|
|
[OSwap] = { "swap", 2, T },
|
|
[OSign] = { "sign", 1, U },
|
|
[OXDiv] = { "xdiv", 1, U },
|
|
};
|
|
|
|
typedef enum {
|
|
PXXX,
|
|
PLbl,
|
|
PPhi,
|
|
PIns,
|
|
PEnd,
|
|
} PState;
|
|
|
|
typedef enum {
|
|
TXXX,
|
|
TCopy,
|
|
TAdd,
|
|
TSub,
|
|
TDiv,
|
|
TRem,
|
|
TPhi,
|
|
TJmp,
|
|
TJez,
|
|
TRet,
|
|
TW,
|
|
TL,
|
|
|
|
TNum,
|
|
TTmp,
|
|
TLbl,
|
|
TAddr,
|
|
TEq,
|
|
TComma,
|
|
TLParen,
|
|
TRParen,
|
|
TNL,
|
|
TEOF,
|
|
} Token;
|
|
|
|
|
|
static FILE *inf;
|
|
static Token thead;
|
|
static struct {
|
|
int64_t num;
|
|
char *str;
|
|
} tokval;
|
|
static int lnum;
|
|
|
|
static Tmp tmp[NTmp];
|
|
static Con con[NCon];
|
|
static int ntmp;
|
|
static int ncon;
|
|
static Phi **plink;
|
|
static Blk *bmap[NBlk+1];
|
|
static Blk *curb;
|
|
static Blk **blink;
|
|
static int nblk;
|
|
|
|
|
|
void *
|
|
alloc(size_t n)
|
|
{
|
|
void *p;
|
|
|
|
/* todo, export in util.c */
|
|
if (n == 0)
|
|
return 0;
|
|
p = calloc(1, n);
|
|
if (!p)
|
|
abort();
|
|
return p;
|
|
}
|
|
|
|
void
|
|
diag(char *s)
|
|
{
|
|
/* todo, export in util.c */
|
|
fputs(s, stderr);
|
|
fputc('\n', stderr);
|
|
abort();
|
|
}
|
|
|
|
static void
|
|
err(char *s)
|
|
{
|
|
char buf[100];
|
|
|
|
snprintf(buf, sizeof buf, "parse: %s (line %d)", s, lnum);
|
|
diag(buf);
|
|
}
|
|
|
|
static Token
|
|
lex()
|
|
{
|
|
static struct {
|
|
char *str;
|
|
Token tok;
|
|
} tmap[] = {
|
|
{ "copy", TCopy },
|
|
{ "add", TAdd },
|
|
{ "sub", TSub },
|
|
{ "div", TDiv },
|
|
{ "rem", TRem },
|
|
{ "phi", TPhi },
|
|
{ "jmp", TJmp },
|
|
{ "jez", TJez },
|
|
{ "ret", TRet },
|
|
{ "w", TW },
|
|
{ "l", TL },
|
|
{ 0, TXXX }
|
|
};
|
|
static char tok[NString];
|
|
int c, i, sgn;
|
|
Token t;
|
|
|
|
do
|
|
c = fgetc(inf);
|
|
while (isblank(c));
|
|
switch (c) {
|
|
case EOF:
|
|
return TEOF;
|
|
case ',':
|
|
return TComma;
|
|
case '(':
|
|
return TLParen;
|
|
case ')':
|
|
return TRParen;
|
|
case '=':
|
|
return TEq;
|
|
case '%':
|
|
t = TTmp;
|
|
c = fgetc(inf);
|
|
goto Alpha;
|
|
case '@':
|
|
t = TLbl;
|
|
c = fgetc(inf);
|
|
goto Alpha;
|
|
case '$':
|
|
t = TAddr;
|
|
c = fgetc(inf);
|
|
goto Alpha;
|
|
case '#':
|
|
while (fgetc(inf) != '\n')
|
|
;
|
|
case '\n':
|
|
lnum++;
|
|
return TNL;
|
|
}
|
|
if (isdigit(c) || c == '-') {
|
|
if (c == '-') {
|
|
tokval.num = 0;
|
|
sgn = -1;
|
|
} else {
|
|
tokval.num = c - '0';
|
|
sgn = 1;
|
|
}
|
|
for (;;) {
|
|
c = fgetc(inf);
|
|
if (!isdigit(c))
|
|
break;
|
|
tokval.num *= 10;
|
|
tokval.num += c - '0';
|
|
}
|
|
ungetc(c, inf);
|
|
tokval.num *= sgn;
|
|
return TNum;
|
|
}
|
|
t = TXXX;
|
|
Alpha:
|
|
if (!isalpha(c))
|
|
err("lexing failure");
|
|
i = 0;
|
|
do {
|
|
if (i >= NString-1)
|
|
err("identifier too long");
|
|
tok[i++] = c;
|
|
c = fgetc(inf);
|
|
} while (isalpha(c) || isdigit(c));
|
|
tok[i] = 0;
|
|
ungetc(c, inf);
|
|
if (t != TXXX) {
|
|
tokval.str = tok;
|
|
return t;
|
|
}
|
|
for (i=0; tmap[i].str; i++)
|
|
if (strcmp(tok, tmap[i].str) == 0)
|
|
return tmap[i].tok;
|
|
err("unknown keyword");
|
|
return -1;
|
|
}
|
|
|
|
static Token
|
|
peek()
|
|
{
|
|
if (thead == TXXX)
|
|
thead = lex();
|
|
return thead;
|
|
}
|
|
|
|
static Token
|
|
next()
|
|
{
|
|
Token t;
|
|
|
|
t = peek();
|
|
thead = TXXX;
|
|
return t;
|
|
}
|
|
|
|
Blk *
|
|
blocka()
|
|
{
|
|
static Blk zblock;
|
|
Blk *b;
|
|
|
|
b = alloc(sizeof *b);
|
|
*b = zblock;
|
|
b->id = nblk++;
|
|
return b;
|
|
}
|
|
|
|
static Ref
|
|
tmpref(char *v)
|
|
{
|
|
int t;
|
|
|
|
for (t=0; t<ntmp; t++)
|
|
if (strcmp(v, tmp[t].name) == 0)
|
|
return TMP(t);
|
|
if (ntmp++ >= NTmp)
|
|
err("too many temporaries");
|
|
strcpy(tmp[t].name, v);
|
|
return TMP(t);
|
|
}
|
|
|
|
static Ref
|
|
parseref()
|
|
{
|
|
Con c;
|
|
int i;
|
|
|
|
switch (next()) {
|
|
case TTmp:
|
|
return tmpref(tokval.str);
|
|
case TNum:
|
|
c = (Con){.type = CNum, .val = tokval.num};
|
|
strcpy(c.label, "");
|
|
if (0) {
|
|
case TAddr:
|
|
c = (Con){.type = CAddr, .val = 0};
|
|
strcpy(c.label, tokval.str);
|
|
}
|
|
for (i=0; i<ncon; i++)
|
|
if (con[i].type == c.type
|
|
&& con[i].val == c.val
|
|
&& strcmp(con[i].label, c.label) == 0)
|
|
return CON(i);
|
|
if (ncon++ >= NCon)
|
|
err("too many constants");
|
|
con[i] = c;
|
|
return CON(i);
|
|
default:
|
|
return R;
|
|
}
|
|
}
|
|
|
|
static Blk *
|
|
findblk(char *name)
|
|
{
|
|
int i;
|
|
|
|
assert(name[0]);
|
|
for (i=0; i<NBlk; i++)
|
|
if (!bmap[i] || strcmp(bmap[i]->name, name) == 0)
|
|
break;
|
|
if (i == NBlk)
|
|
err("too many blocks");
|
|
if (!bmap[i]) {
|
|
bmap[i] = blocka();
|
|
strcpy(bmap[i]->name, name);
|
|
}
|
|
return bmap[i];
|
|
}
|
|
|
|
static void
|
|
expect(Token t)
|
|
{
|
|
static char *names[] = {
|
|
[TLbl] = "label",
|
|
[TComma] = "comma",
|
|
[TEq] = "=",
|
|
[TNL] = "newline",
|
|
[TEOF] = 0,
|
|
};
|
|
char buf[128], *s1, *s2;
|
|
Token t1;
|
|
|
|
t1 = next();
|
|
if (t == t1)
|
|
return;
|
|
s1 = names[t] ? names[t] : "??";
|
|
s2 = names[t1] ? names[t1] : "??";
|
|
snprintf(buf, sizeof buf,
|
|
"%s expected (got %s instead)", s1, s2);
|
|
err(buf);
|
|
}
|
|
|
|
static void
|
|
closeblk()
|
|
{
|
|
curb->nins = curi - insb;
|
|
curb->ins = alloc(curb->nins * sizeof(Ins));
|
|
memcpy(curb->ins, insb, curb->nins * sizeof(Ins));
|
|
blink = &curb->link;
|
|
curi = insb;
|
|
}
|
|
|
|
static PState
|
|
parseline(PState ps)
|
|
{
|
|
Ref arg[NPred] = {R};
|
|
Blk *blk[NPred];
|
|
Phi *phi;
|
|
Ref r;
|
|
Token t;
|
|
Blk *b;
|
|
int op, i;
|
|
|
|
do
|
|
t = next();
|
|
while (t == TNL);
|
|
if (ps == PLbl && t != TLbl && t != TEOF)
|
|
err("label or end of file expected");
|
|
switch (t) {
|
|
default:
|
|
err("label, instruction or jump expected");
|
|
case TEOF:
|
|
return PEnd;
|
|
case TTmp:
|
|
break;
|
|
case TLbl:
|
|
b = findblk(tokval.str);
|
|
if (b->jmp.type != JXXX)
|
|
err("multiple definitions of block");
|
|
if (curb && curb->jmp.type == JXXX) {
|
|
closeblk();
|
|
curb->jmp.type = JJmp;
|
|
curb->s1 = b;
|
|
}
|
|
*blink = b;
|
|
curb = b;
|
|
plink = &curb->phi;
|
|
expect(TNL);
|
|
return PPhi;
|
|
case TRet:
|
|
curb->jmp.type = JRet;
|
|
goto Close;
|
|
case TJmp:
|
|
curb->jmp.type = JJmp;
|
|
goto Jump;
|
|
case TJez:
|
|
curb->jmp.type = JJez;
|
|
r = parseref();
|
|
if (req(r, R))
|
|
err("invalid argument for jez jump");
|
|
curb->jmp.arg = r;
|
|
expect(TComma);
|
|
Jump:
|
|
expect(TLbl);
|
|
curb->s1 = findblk(tokval.str);
|
|
if (curb->jmp.type != JJmp) {
|
|
expect(TComma);
|
|
expect(TLbl);
|
|
curb->s2 = findblk(tokval.str);
|
|
}
|
|
Close:
|
|
expect(TNL);
|
|
closeblk();
|
|
return PLbl;
|
|
}
|
|
r = tmpref(tokval.str);
|
|
expect(TEq);
|
|
switch (next()) {
|
|
case TW:
|
|
tmp[r.val].type = TWord;
|
|
break;
|
|
case TL:
|
|
tmp[r.val].type = TLong;
|
|
break;
|
|
default:
|
|
err("class expected after =");
|
|
}
|
|
switch (next()) {
|
|
case TCopy:
|
|
op = OCopy;
|
|
break;
|
|
case TAdd:
|
|
op = OAdd;
|
|
break;
|
|
case TSub:
|
|
op = OSub;
|
|
break;
|
|
case TDiv:
|
|
op = ODiv;
|
|
break;
|
|
case TRem:
|
|
op = ORem;
|
|
break;
|
|
case TPhi:
|
|
if (ps != PPhi)
|
|
err("unexpected phi instruction");
|
|
op = -1;
|
|
break;
|
|
default:
|
|
err("invalid instruction");
|
|
}
|
|
i = 0;
|
|
if (peek() != TNL)
|
|
for (;;) {
|
|
if (i == NPred)
|
|
err("too many arguments");
|
|
if (op == -1) {
|
|
expect(TLbl);
|
|
blk[i] = findblk(tokval.str);
|
|
}
|
|
arg[i] = parseref();
|
|
if (req(arg[i], R))
|
|
err("invalid instruction argument");
|
|
i++;
|
|
t = peek();
|
|
if (t == TNL)
|
|
break;
|
|
if (t != TComma)
|
|
err("comma or end of line expected");
|
|
next();
|
|
}
|
|
next();
|
|
if (op != -1 && i != opdesc[op].arity)
|
|
err("invalid arity");
|
|
if (op != -1) {
|
|
if (curi - insb >= NIns)
|
|
err("too many instructions in block");
|
|
curi->op = op;
|
|
curi->to = r;
|
|
curi->arg[0] = arg[0];
|
|
curi->arg[1] = arg[1];
|
|
curi++;
|
|
return PIns;
|
|
} else {
|
|
phi = alloc(sizeof *phi);
|
|
phi->to = r;
|
|
memcpy(phi->arg, arg, i * sizeof arg[0]);
|
|
memcpy(phi->blk, blk, i * sizeof blk[0]);
|
|
phi->narg = i;
|
|
*plink = phi;
|
|
plink = &phi->link;
|
|
return PPhi;
|
|
}
|
|
}
|
|
|
|
Fn *
|
|
parsefn(FILE *f)
|
|
{
|
|
int i;
|
|
PState ps;
|
|
Fn *fn;
|
|
|
|
inf = f;
|
|
for (i=0; i<NBlk; i++)
|
|
bmap[i] = 0;
|
|
for (i=0; i<NTmp; i++)
|
|
tmp[i] = (Tmp){.name = ""};
|
|
ntmp = 1;
|
|
ncon = 0;
|
|
curi = insb;
|
|
curb = 0;
|
|
lnum = 1;
|
|
nblk = 0;
|
|
fn = alloc(sizeof *fn);
|
|
blink = &fn->start;
|
|
ps = PLbl;
|
|
do
|
|
ps = parseline(ps);
|
|
while (ps != PEnd);
|
|
if (!curb)
|
|
err("empty file");
|
|
if (curb->jmp.type == JXXX)
|
|
err("last block misses jump");
|
|
fn->tmp = alloc(ntmp * sizeof tmp[0]);
|
|
memcpy(fn->tmp, tmp, ntmp * sizeof tmp[0]);
|
|
fn->con = alloc(ncon * sizeof con[0]);
|
|
memcpy(fn->con, con, ncon * sizeof con[0]);
|
|
fn->ntmp = ntmp;
|
|
fn->ncon = ncon;
|
|
fn->nblk = nblk;
|
|
fn->rpo = 0;
|
|
return fn;
|
|
}
|
|
|
|
static char *
|
|
printref(Ref r, Fn *fn, FILE *f)
|
|
{
|
|
static char *ttoa[] = {
|
|
[TUndef] = "?",
|
|
[TWord] = "w",
|
|
[TLong] = "l",
|
|
};
|
|
|
|
switch (r.type) {
|
|
case RTmp:
|
|
fprintf(f, "%%%s", fn->tmp[r.val].name);
|
|
return ttoa[fn->tmp[r.val].type];
|
|
case RCon:
|
|
switch (fn->con[r.val].type) {
|
|
case CAddr:
|
|
fprintf(f, "$%s", fn->con[r.val].label);
|
|
if (fn->con[r.val].val)
|
|
fprintf(f, "%+"PRIi64, fn->con[r.val].val);
|
|
break;
|
|
case CNum:
|
|
fprintf(f, "%"PRIi64, fn->con[r.val].val);
|
|
break;
|
|
case CUndef:
|
|
diag("printref: invalid constant");
|
|
}
|
|
break;
|
|
case RSlot:
|
|
fprintf(f, "$%d", r.val);
|
|
break;
|
|
case RReg:
|
|
fprintf(f, "???");
|
|
break;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
void
|
|
printfn(Fn *fn, FILE *f)
|
|
{
|
|
Blk *b;
|
|
Phi *p;
|
|
Ins *i;
|
|
uint n;
|
|
char *cl;
|
|
|
|
for (b=fn->start; b; b=b->link) {
|
|
fprintf(f, "@%s\n", b->name);
|
|
for (p=b->phi; p; p=p->link) {
|
|
fprintf(f, "\t");
|
|
cl = printref(p->to, fn, f);
|
|
fprintf(f, " =%s phi ", cl);
|
|
assert(p->narg);
|
|
for (n=0;; n++) {
|
|
fprintf(f, "@%s ", p->blk[n]->name);
|
|
printref(p->arg[n], fn, f);
|
|
if (n == p->narg-1) {
|
|
fprintf(f, "\n");
|
|
break;
|
|
} else
|
|
fprintf(f, ", ");
|
|
}
|
|
}
|
|
for (i=b->ins; i-b->ins < b->nins; i++) {
|
|
fprintf(f, "\t");
|
|
if (!req(i->to, R)) {
|
|
cl = printref(i->to, fn, f);
|
|
fprintf(f, " =%s ", cl);
|
|
}
|
|
assert(opdesc[i->op].name);
|
|
fprintf(f, "%s", opdesc[i->op].name);
|
|
n = opdesc[i->op].arity;
|
|
if (n > 0) {
|
|
fprintf(f, " ");
|
|
printref(i->arg[0], fn, f);
|
|
}
|
|
if (n > 1) {
|
|
fprintf(f, ", ");
|
|
printref(i->arg[1], fn, f);
|
|
}
|
|
fprintf(f, "\n");
|
|
}
|
|
switch (b->jmp.type) {
|
|
case JRet:
|
|
fprintf(f, "\tret\n");
|
|
break;
|
|
case JJmp:
|
|
if (b->s1 != b->link)
|
|
fprintf(f, "\tjmp @%s\n", b->s1->name);
|
|
break;
|
|
case JJez:
|
|
fprintf(f, "\tjez ");
|
|
printref(b->jmp.arg, fn, f);
|
|
fprintf(f, ", @%s, @%s\n", b->s1->name, b->s2->name);
|
|
break;
|
|
}
|
|
}
|
|
}
|