The phi fixing mechanism can use emit(), so we need to set curi before performing the rewrite. Otherwise, we are writing at random places in the instruction buffer (not so bad because it is bounds checked), but then we loose the instructions written (bad)!
969 lines
18 KiB
C
969 lines
18 KiB
C
#include "lisc.h"
|
|
#include <limits.h>
|
|
|
|
/* For x86_64, do the following:
|
|
*
|
|
* - lower calls
|
|
* - check that constants are used only in
|
|
* places allowed
|
|
* - ensure immediates always fit in 32b
|
|
* - explicit machine register contraints
|
|
* on instructions like division.
|
|
* - implement fast locals (the streak of
|
|
* constant allocX in the first basic block)
|
|
* - recognize complex addressing modes
|
|
*
|
|
* Invariant: the use counts that are used
|
|
* in sel() must be sound. This
|
|
* is not so trivial, maybe the
|
|
* dce should be moved out...
|
|
*/
|
|
|
|
typedef struct ANum ANum;
|
|
typedef struct AClass AClass;
|
|
|
|
struct ANum {
|
|
char n, l, r;
|
|
Ins *i;
|
|
Ref mem;
|
|
};
|
|
|
|
static void amatch(Addr *, Ref, ANum *, Fn *, int);
|
|
|
|
static int
|
|
fcmptoi(int fc)
|
|
{
|
|
switch (fc) {
|
|
default: diag("isel: fcmptoi defaulted");
|
|
case FCle: return ICule;
|
|
case FClt: return ICult;
|
|
case FCgt: return ICugt;
|
|
case FCge: return ICuge;
|
|
case FCne: return ICne;
|
|
case FCeq: return ICeq;
|
|
case FCo: return ICXnp;
|
|
case FCuo: return ICXp;
|
|
}
|
|
}
|
|
|
|
static int
|
|
iscmp(int op, int *pk, int *pc)
|
|
{
|
|
int k, c;
|
|
|
|
if (OCmpw <= op && op <= OCmpw1) {
|
|
c = op - OCmpw;
|
|
k = Kw;
|
|
}
|
|
else if (OCmpl <= op && op <= OCmpl1) {
|
|
c = op - OCmpl;
|
|
k = Kl;
|
|
}
|
|
else if (OCmps <= op && op <= OCmps1) {
|
|
c = fcmptoi(op - OCmps);
|
|
k = Ks;
|
|
}
|
|
else if (OCmpd <= op && op <= OCmpd1) {
|
|
c = fcmptoi(op - OCmpd);
|
|
k = Kd;
|
|
}
|
|
else
|
|
return 0;
|
|
if (pk)
|
|
*pk = k;
|
|
if (pc)
|
|
*pc = c;
|
|
return 1;
|
|
}
|
|
|
|
static int
|
|
noimm(Ref r, Fn *fn)
|
|
{
|
|
int64_t val;
|
|
|
|
assert(rtype(r) == RCon);
|
|
switch (fn->con[r.val].type) {
|
|
default:
|
|
diag("isel: invalid constant");
|
|
case CAddr:
|
|
/* we only support the 'small'
|
|
* code model of the ABI, this
|
|
* means that we can always
|
|
* address data with 32bits
|
|
*/
|
|
return 0;
|
|
case CBits:
|
|
val = fn->con[r.val].bits.i;
|
|
return (val < INT32_MIN || val > INT32_MAX);
|
|
}
|
|
}
|
|
|
|
static int
|
|
rslot(Ref r, Fn *fn)
|
|
{
|
|
if (rtype(r) != RTmp)
|
|
return -1;
|
|
return fn->tmp[r.val].slot;
|
|
}
|
|
|
|
static int
|
|
argcls(Ins *i, int n)
|
|
{
|
|
switch (i->op) {
|
|
case OStores:
|
|
return n == 0 ? Ks : Kl;
|
|
case OStored:
|
|
return n == 0 ? Kd : Kl;
|
|
case OStoreb:
|
|
case OStoreh:
|
|
case OStorew:
|
|
return n == 0 ? Kw : Kl;
|
|
case OStorel:
|
|
return Kl;
|
|
default:
|
|
if (OCmpw <= i->op && i->op <= OCmpd1)
|
|
diag("isel: invalid call to argcls");
|
|
if (isload(i->op))
|
|
return Kl;
|
|
if (isext(i->op))
|
|
return Kw;
|
|
return i->cls;
|
|
}
|
|
}
|
|
|
|
static void
|
|
fixarg(Ref *r, int k, int phi, Fn *fn)
|
|
{
|
|
Addr a;
|
|
Ref r0, r1;
|
|
int s, n;
|
|
|
|
r1 = r0 = *r;
|
|
s = rslot(r0, fn);
|
|
if (KBASE(k) == 1 && rtype(r0) == RCon) {
|
|
/* load floating points from memory
|
|
* slots, they can't be used as
|
|
* immediates
|
|
*/
|
|
r1 = MEM(fn->nmem);
|
|
vgrow(&fn->mem, ++fn->nmem);
|
|
memset(&a, 0, sizeof a);
|
|
a.offset.type = CAddr;
|
|
n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
|
|
sprintf(a.offset.label, ".Lfp%d", n);
|
|
fn->mem[fn->nmem-1] = a;
|
|
}
|
|
else if (!phi && rtype(r0) == RCon && noimm(r0, fn)) {
|
|
/* load constants that do not fit in
|
|
* a 32bit signed integer into a
|
|
* long temporary
|
|
*/
|
|
r1 = newtmp("isel", fn);
|
|
emit(OCopy, Kl, r1, r0, R);
|
|
}
|
|
else if (s != -1) {
|
|
/* load fast locals' addresses into
|
|
* temporaries right before the
|
|
* instruction
|
|
*/
|
|
r1 = newtmp("isel", fn);
|
|
emit(OAddr, Kl, r1, SLOT(s), R);
|
|
}
|
|
*r = r1;
|
|
}
|
|
|
|
static void
|
|
chuse(Ref r, int du, Fn *fn)
|
|
{
|
|
if (rtype(r) == RTmp)
|
|
fn->tmp[r.val].nuse += du;
|
|
}
|
|
|
|
static void
|
|
seladdr(Ref *r, ANum *an, Fn *fn)
|
|
{
|
|
Addr a;
|
|
Ref r0, r1;
|
|
|
|
r0 = *r;
|
|
if (rtype(r0) == RTmp) {
|
|
chuse(r0, -1, fn);
|
|
r1 = an[r0.val].mem;
|
|
if (req(r1, R)) {
|
|
amatch(&a, r0, an, fn, 1);
|
|
vgrow(&fn->mem, ++fn->nmem);
|
|
fn->mem[fn->nmem-1] = a;
|
|
r1 = MEM(fn->nmem-1);
|
|
chuse(a.base, +1, fn);
|
|
chuse(a.index, +1, fn);
|
|
if (rtype(a.base) != RTmp)
|
|
if (rtype(a.index) != RTmp)
|
|
an[r0.val].mem = r1;
|
|
}
|
|
*r = r1;
|
|
}
|
|
}
|
|
|
|
static void
|
|
selcmp(Ref arg[2], int k, Fn *fn)
|
|
{
|
|
Ref r;
|
|
|
|
if (rtype(arg[0]) == RCon) {
|
|
r = arg[1];
|
|
arg[1] = arg[0];
|
|
arg[0] = r;
|
|
}
|
|
assert(rtype(arg[0]) != RCon);
|
|
emit(OXCmp, k, R, arg[1], arg[0]);
|
|
fixarg(&curi->arg[0], k, 0, fn);
|
|
}
|
|
|
|
static void
|
|
sel(Ins i, ANum *an, Fn *fn)
|
|
{
|
|
Ref r0, r1;
|
|
int x, k, kc;
|
|
int64_t val;
|
|
Ins *i0;
|
|
|
|
if (rtype(i.to) == RTmp)
|
|
if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
|
|
if (fn->tmp[i.to.val].nuse == 0) {
|
|
chuse(i.arg[0], -1, fn);
|
|
chuse(i.arg[1], -1, fn);
|
|
return;
|
|
}
|
|
i0 = curi;
|
|
k = i.cls;
|
|
switch (i.op) {
|
|
case ODiv:
|
|
case ORem:
|
|
if (i.op == ODiv)
|
|
r0 = TMP(RAX), r1 = TMP(RDX);
|
|
else
|
|
r0 = TMP(RDX), r1 = TMP(RAX);
|
|
emit(OCopy, k, i.to, r0, R);
|
|
emit(OCopy, k, R, r1, R);
|
|
if (rtype(i.arg[1]) == RCon) {
|
|
/* immediates not allowed for
|
|
* divisions in x86
|
|
*/
|
|
r0 = newtmp("isel", fn);
|
|
} else
|
|
r0 = i.arg[1];
|
|
emit(OXDiv, k, R, r0, R);
|
|
emit(OSign, k, TMP(RDX), TMP(RAX), R);
|
|
emit(OCopy, k, TMP(RAX), i.arg[0], R);
|
|
if (rtype(i.arg[1]) == RCon)
|
|
emit(OCopy, k, r0, i.arg[1], R);
|
|
break;
|
|
case ONop:
|
|
break;
|
|
case OStored:
|
|
if (rtype(i.arg[0]) == RCon)
|
|
i.op = OStorel;
|
|
case OStores:
|
|
if (rtype(i.arg[0]) == RCon)
|
|
i.op = OStorew;
|
|
case OStorel:
|
|
case OStorew:
|
|
case OStoreh:
|
|
case OStoreb:
|
|
seladdr(&i.arg[1], an, fn);
|
|
goto Emit;
|
|
case_OLoad:
|
|
seladdr(&i.arg[0], an, fn);
|
|
goto Emit;
|
|
case OXPush:
|
|
case OCall:
|
|
case OSAlloc:
|
|
case OCopy:
|
|
case OAdd:
|
|
case OSub:
|
|
case OMul:
|
|
case OAnd:
|
|
case OXTest:
|
|
case_OExt:
|
|
Emit:
|
|
emiti(i);
|
|
fixarg(&curi->arg[0], argcls(curi, 0), 0, fn);
|
|
fixarg(&curi->arg[1], argcls(curi, 1), 0, fn);
|
|
break;
|
|
case OAlloc:
|
|
case OAlloc+1:
|
|
case OAlloc+2: /* == OAlloc1 */
|
|
/* we need to make sure
|
|
* the stack remains aligned
|
|
* (rsp = 0) mod 16
|
|
*/
|
|
if (rtype(i.arg[0]) == RCon) {
|
|
assert(fn->con[i.arg[0].val].type == CBits);
|
|
val = fn->con[i.arg[0].val].bits.i;
|
|
val = (val + 15) & ~INT64_C(15);
|
|
if (val < 0 || val > INT32_MAX)
|
|
diag("isel: alloc too large");
|
|
emit(OAlloc, Kl, i.to, getcon(val, fn), R);
|
|
} else {
|
|
/* r0 = (i.arg[0] + 15) & -16 */
|
|
r0 = newtmp("isel", fn);
|
|
r1 = newtmp("isel", fn);
|
|
emit(OSAlloc, Kl, i.to, r0, R);
|
|
emit(OAnd, 1, r0, r1, getcon(-16, fn));
|
|
emit(OAdd, 1, r1, i.arg[0], getcon(15, fn));
|
|
}
|
|
break;
|
|
default:
|
|
if (isext(i.op))
|
|
goto case_OExt;
|
|
if (isload(i.op))
|
|
goto case_OLoad;
|
|
if (iscmp(i.op, &kc, &x)) {
|
|
if (rtype(i.arg[0]) == RCon)
|
|
x = icmpop(x);
|
|
emit(OXSet+x, k, i.to, R, R);
|
|
selcmp(i.arg, kc, fn);
|
|
break;
|
|
}
|
|
diag("isel: non-exhaustive implementation");
|
|
}
|
|
|
|
while (i0 > curi && --i0)
|
|
if (rslot(i0->arg[0], fn) != -1
|
|
|| rslot(i0->arg[1], fn) != -1)
|
|
diag("isel: usupported address argument");
|
|
}
|
|
|
|
static Ins *
|
|
flagi(Ins *i0, Ins *i)
|
|
{
|
|
while (i>i0)
|
|
switch ((--i)->op) {
|
|
default:
|
|
if (iscmp(i->op, 0, 0))
|
|
return i;
|
|
if (isext(i->op) || isload(i->op))
|
|
continue;
|
|
return 0;
|
|
case OAdd: /* flag-setting */
|
|
case OSub:
|
|
case OAnd:
|
|
return i;
|
|
case OCopy: /* flag-transparent */
|
|
case OStored:
|
|
case OStores:
|
|
case OStorel:
|
|
case OStorew:
|
|
case OStoreh:
|
|
case OStoreb:;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void
|
|
seljmp(Blk *b, Fn *fn)
|
|
{
|
|
Ref r;
|
|
int c, k;
|
|
Ins *fi;
|
|
|
|
switch (b->jmp.type) {
|
|
default:
|
|
return;
|
|
case JRetc:
|
|
assert(!"retc todo");
|
|
case JRetw:
|
|
case JRetl:
|
|
case JRets:
|
|
case JRetd:
|
|
k = b->jmp.type - JRetw;
|
|
b->jmp.type = JRet0;
|
|
r = b->jmp.arg;
|
|
b->jmp.arg = R;
|
|
if (KBASE(k) == 0)
|
|
emit(OCopy, k, TMP(RAX), r, R);
|
|
else
|
|
emit(OCopy, k, TMP(XMM0), r, R);
|
|
return;
|
|
case JJnz:;
|
|
}
|
|
r = b->jmp.arg;
|
|
b->jmp.arg = R;
|
|
assert(!req(r, R));
|
|
if (rtype(r) == RCon) {
|
|
b->jmp.type = JJmp;
|
|
if (req(r, CON_Z))
|
|
b->s1 = b->s2;
|
|
b->s2 = 0;
|
|
return;
|
|
}
|
|
fi = flagi(b->ins, &b->ins[b->nins]);
|
|
if (fi && req(fi->to, r)) {
|
|
if (iscmp(fi->op, &k, &c)) {
|
|
if (rtype(fi->arg[0]) == RCon)
|
|
c = icmpop(c);
|
|
b->jmp.type = JXJc + c;
|
|
if (fn->tmp[r.val].nuse == 1) {
|
|
assert(fn->tmp[r.val].ndef == 1);
|
|
selcmp(fi->arg, k, fn);
|
|
*fi = (Ins){.op = ONop};
|
|
}
|
|
return;
|
|
}
|
|
if (fi->op == OAnd && fn->tmp[r.val].nuse == 1
|
|
&& (rtype(fi->arg[0]) == RTmp ||
|
|
rtype(fi->arg[1]) == RTmp)) {
|
|
fi->op = OXTest;
|
|
fi->to = R;
|
|
b->jmp.type = JXJc + ICne;
|
|
if (rtype(fi->arg[1]) == RCon) {
|
|
r = fi->arg[1];
|
|
fi->arg[1] = fi->arg[0];
|
|
fi->arg[0] = r;
|
|
}
|
|
return;
|
|
}
|
|
if (fn->tmp[r.val].nuse > 1) {
|
|
b->jmp.type = JXJc + ICne;
|
|
return;
|
|
}
|
|
}
|
|
selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */
|
|
b->jmp.type = JXJc + ICne;
|
|
}
|
|
|
|
struct AClass {
|
|
int inmem;
|
|
int align;
|
|
uint size;
|
|
int cls[2];
|
|
};
|
|
|
|
static void
|
|
aclass(AClass *a, Typ *t)
|
|
{
|
|
int e, s, n, cls;
|
|
uint sz, al;
|
|
|
|
sz = t->size;
|
|
al = 1u << t->align;
|
|
|
|
/* the ABI requires sizes to be rounded
|
|
* up to the nearest multiple of 8, moreover
|
|
* it makes it easy load and store structures
|
|
* in registers
|
|
*/
|
|
if (al < 8)
|
|
al = 8;
|
|
sz = (sz + al-1) & -al;
|
|
|
|
a->size = sz;
|
|
a->align = t->align;
|
|
|
|
if (t->dark || sz > 16) {
|
|
/* large or unaligned structures are
|
|
* required to be passed in memory
|
|
*/
|
|
a->inmem = 1;
|
|
return;
|
|
}
|
|
|
|
for (e=0, s=0; e<2; e++) {
|
|
cls = -1;
|
|
for (n=0; n<8 && t->seg[s].len; s++) {
|
|
if (t->seg[s].flt) {
|
|
if (cls == -1)
|
|
cls = Kd;
|
|
} else
|
|
cls = Kl;
|
|
n += t->seg[s].len;
|
|
}
|
|
assert(n <= 8);
|
|
a->cls[e] = cls;
|
|
}
|
|
}
|
|
|
|
static int
|
|
classify(Ins *i0, Ins *i1, AClass *ac, int op)
|
|
{
|
|
int nint, ni, nsse, ns, n, *pn;
|
|
AClass *a;
|
|
Ins *i;
|
|
|
|
nint = 6;
|
|
nsse = 8;
|
|
for (i=i0, a=ac; i<i1; i++, a++) {
|
|
if (i->op == op) {
|
|
if (KBASE(i->cls) == 0)
|
|
pn = &nint;
|
|
else
|
|
pn = &nsse;
|
|
if (*pn > 0) {
|
|
--*pn;
|
|
a->inmem = 0;
|
|
} else
|
|
a->inmem = 2;
|
|
a->align = 3;
|
|
a->size = 8;
|
|
a->cls[0] = i->cls;
|
|
} else {
|
|
n = i->arg[0].val & AMask;
|
|
aclass(a, &typ[n]);
|
|
if (a->inmem)
|
|
continue;
|
|
ni = ns = 0;
|
|
for (n=0; n<2; n++)
|
|
if (a->cls[n] == 0)
|
|
ni++;
|
|
else
|
|
ns++;
|
|
if (nint > ni && nsse > ns) {
|
|
nint -= ni;
|
|
nsse -= ns;
|
|
} else
|
|
a->inmem = 1;
|
|
}
|
|
}
|
|
|
|
return ((6-nint) << 4) | ((8-nsse) << 8);
|
|
}
|
|
|
|
int rsave[/* NRSave */] = {
|
|
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
|
|
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
|
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
|
|
};
|
|
typedef char make_sure_rsave_has_correct_size[
|
|
sizeof rsave == NRSave * sizeof(int) ? 1 : -1
|
|
];
|
|
int rclob[NRClob] = {RBX, R12, R13, R14, R15};
|
|
|
|
ulong
|
|
calldef(Ins i, int p[2])
|
|
{
|
|
ulong b;
|
|
int ni, nf;
|
|
|
|
b = 0;
|
|
ni = i.arg[1].val & 3;
|
|
nf = (i.arg[1].val >> 2) & 3;
|
|
if (ni >= 1)
|
|
b |= BIT(RAX);
|
|
if (ni >= 2)
|
|
b |= BIT(RDX);
|
|
if (nf >= 1)
|
|
b |= BIT(XMM0);
|
|
if (nf >= 2)
|
|
b |= BIT(XMM1);
|
|
if (p) {
|
|
p[0] = ni;
|
|
p[1] = nf;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
ulong
|
|
calluse(Ins i, int p[2])
|
|
{
|
|
ulong b;
|
|
int j, ni, nf;
|
|
|
|
b = 0;
|
|
ni = (i.arg[1].val >> 4) & 15;
|
|
nf = (i.arg[1].val >> 8) & 15;
|
|
for (j=0; j<ni; j++)
|
|
b |= BIT(rsave[j]);
|
|
for (j=0; j<nf; j++)
|
|
b |= BIT(XMM0+j);
|
|
if (p) {
|
|
p[0] = ni;
|
|
p[1] = nf;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
static Ref
|
|
rarg(int ty, int *ni, int *ns)
|
|
{
|
|
if (KBASE(ty) == 0)
|
|
return TMP(rsave[(*ni)++]);
|
|
else
|
|
return TMP(XMM0 + (*ns)++);
|
|
}
|
|
|
|
static void
|
|
selcall(Fn *fn, Ins *i0, Ins *i1)
|
|
{
|
|
Ins *i;
|
|
AClass *ac, *a;
|
|
int ca, ni, ns;
|
|
uint stk, sz;
|
|
Ref r, r1, r2;
|
|
|
|
ac = alloc((i1-i0) * sizeof ac[0]);
|
|
ca = classify(i0, i1, ac, OArg);
|
|
|
|
for (stk=0, a=&ac[i1-i0]; a>ac;)
|
|
if ((--a)->inmem) {
|
|
assert(a->align <= 4);
|
|
stk += a->size;
|
|
if (a->align == 4) /* todo, bigger alignments */
|
|
stk += stk & 15;
|
|
}
|
|
stk += stk & 15;
|
|
|
|
if (!req(i1->arg[1], R))
|
|
diag("struct-returning function not implemented");
|
|
if (stk) {
|
|
r = getcon(-(int64_t)stk, fn);
|
|
emit(OSAlloc, Kl, R, r, R);
|
|
}
|
|
emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
|
|
emit(OCall, i1->cls, R, i1->arg[0], CALL(1 | ca));
|
|
|
|
for (i=i0, a=ac, ni=ns=0; i<i1; i++, a++) {
|
|
if (a->inmem)
|
|
continue;
|
|
r1 = rarg(a->cls[0], &ni, &ns);
|
|
if (i->op == OArgc) {
|
|
if (a->size > 8) {
|
|
r2 = rarg(a->cls[1], &ni, &ns);
|
|
r = newtmp("isel", fn);
|
|
emit(OLoad, a->cls[1], r2, r, R);
|
|
emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
|
|
}
|
|
emit(OLoad, a->cls[0], r1, i->arg[1], R);
|
|
} else
|
|
emit(OCopy, i->cls, r1, i->arg[0], R);
|
|
}
|
|
for (i=i0, a=ac; i<i1; i++, a++) {
|
|
if (!a->inmem)
|
|
continue;
|
|
sz = a->size;
|
|
if (a->align == 4)
|
|
sz += (stk-sz) & 15;
|
|
stk -= sz;
|
|
if (i->op == OArgc) {
|
|
assert(!"argc todo 1");
|
|
} else {
|
|
emit(OXPush, Kl, R, i->arg[0], R);
|
|
}
|
|
}
|
|
if (stk) {
|
|
assert(stk == 8);
|
|
emit(OXPush, Kl, R, CON_Z, R);
|
|
}
|
|
}
|
|
|
|
static void
|
|
selpar(Fn *fn, Ins *i0, Ins *i1)
|
|
{
|
|
AClass *ac, *a;
|
|
Ins *i;
|
|
int ni, ns, stk, al;
|
|
Ref r, r1;
|
|
|
|
ac = alloc((i1-i0) * sizeof ac[0]);
|
|
classify(i0, i1, ac, OPar);
|
|
|
|
curi = insb;
|
|
ni = ns = 0;
|
|
assert(NAlign == 3);
|
|
stk = -2;
|
|
for (i=i0, a=ac; i<i1; i++, a++) {
|
|
switch (a->inmem) {
|
|
case 1:
|
|
assert(!"argc todo 2");
|
|
continue;
|
|
case 2:
|
|
stk -= 2;
|
|
*curi++ = (Ins){OLoad, i->to, {SLOT(stk)}, i->cls};
|
|
continue;
|
|
}
|
|
r1 = rarg(a->cls[0], &ni, &ns);
|
|
if (i->op == OParc) {
|
|
r = newtmp("isel", fn);
|
|
*curi++ = (Ins){OCopy, r, {r1}, Kl};
|
|
a->cls[0] = r.val;
|
|
if (a->size > 8) {
|
|
r1 = rarg(a->cls[1], &ni, &ns);
|
|
r = newtmp("isel", fn);
|
|
*curi++ = (Ins){OCopy, r, {r1}, Kl};
|
|
a->cls[1] = r.val;
|
|
}
|
|
} else
|
|
*curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
|
|
}
|
|
for (i=i0, a=ac; i<i1; i++, a++) {
|
|
if (i->op != OParc || a->inmem)
|
|
continue;
|
|
assert(NAlign == 3);
|
|
for (al=0; a->align >> (al+2); al++)
|
|
;
|
|
r = TMP(a->cls[0]);
|
|
r1 = i->to;
|
|
*curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
|
|
*curi++ = (Ins){OStorel, R, {r, r1}, 0};
|
|
if (a->size > 8) {
|
|
r = newtmp("isel", fn);
|
|
*curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
|
|
r1 = TMP(a->cls[1]);
|
|
*curi++ = (Ins){OStorel, R, {r1, r}, 0};
|
|
}
|
|
}
|
|
}
|
|
|
|
static int
|
|
aref(Ref r, ANum *ai)
|
|
{
|
|
switch (rtype(r)) {
|
|
default:
|
|
diag("isel: aref defaulted");
|
|
case RCon:
|
|
return 2;
|
|
case RTmp:
|
|
return ai[r.val].n;
|
|
}
|
|
}
|
|
|
|
static int
|
|
ascale(Ref r, Con *con)
|
|
{
|
|
int64_t n;
|
|
|
|
if (rtype(r) != RCon)
|
|
return 0;
|
|
if (con[r.val].type != CBits)
|
|
return 0;
|
|
n = con[r.val].bits.i;
|
|
return n == 1 || n == 2 || n == 4 || n == 8;
|
|
}
|
|
|
|
static void
|
|
anumber(ANum *ai, Blk *b, Con *con)
|
|
{
|
|
/* This should be made obsolete by a proper
|
|
* reassoc pass.
|
|
*
|
|
* Rules:
|
|
*
|
|
* RTmp(_) -> 0 tmp
|
|
* ( RTmp(_) -> 1 slot )
|
|
* RCon(_) -> 2 con
|
|
* 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
|
|
*/
|
|
static char add[10][10] = {
|
|
[2] [2] = 2, /* folding */
|
|
[2] [5] = 5, [5] [2] = 5,
|
|
[2] [6] = 6, [6] [2] = 6,
|
|
[2] [7] = 7, [7] [2] = 7,
|
|
[0] [0] = 4, /* 4: b + s * i */
|
|
[0] [3] = 4, [3] [0] = 4,
|
|
[2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
|
|
[0] [2] = 6, [2] [0] = 6, /* 6: o + b */
|
|
[2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
|
|
[0] [5] = 7, [5] [0] = 7,
|
|
[6] [3] = 7, [3] [6] = 7,
|
|
|
|
};
|
|
int a, a1, a2, n1, n2, t1, t2;
|
|
Ins *i;
|
|
|
|
for (i=b->ins; i-b->ins < b->nins; i++) {
|
|
if (rtype(i->to) == RTmp)
|
|
ai[i->to.val].i = i;
|
|
if (i->op != OAdd && i->op != OMul)
|
|
continue;
|
|
a1 = aref(i->arg[0], ai);
|
|
a2 = aref(i->arg[1], ai);
|
|
t1 = a1 != 1 && a1 != 2;
|
|
t2 = a2 != 1 && a2 != 2;
|
|
if (i->op == OAdd) {
|
|
a = add[n1 = a1][n2 = a2];
|
|
if (t1 && a < add[0][a2])
|
|
a = add[n1 = 0][n2 = a2];
|
|
if (t2 && a < add[a1][0])
|
|
a = add[n1 = a1][n2 = 0];
|
|
if (t1 && t2 && a < add[0][0])
|
|
a = add[n1 = 0][n2 = 0];
|
|
} else {
|
|
n1 = n2 = a = 0;
|
|
if (ascale(i->arg[0], con) && t2)
|
|
a = 3, n1 = 2, n2 = 0;
|
|
if (t1 && ascale(i->arg[1], con))
|
|
a = 3, n1 = 0, n2 = 2;
|
|
}
|
|
ai[i->to.val].n = a;
|
|
ai[i->to.val].l = n1;
|
|
ai[i->to.val].r = n2;
|
|
}
|
|
}
|
|
|
|
static void
|
|
amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
|
|
{
|
|
Ins *i;
|
|
int nl, nr, t, s;
|
|
Ref al, ar;
|
|
|
|
if (top)
|
|
memset(a, 0, sizeof *a);
|
|
if (rtype(r) == RCon) {
|
|
addcon(&a->offset, &fn->con[r.val]);
|
|
return;
|
|
}
|
|
assert(rtype(r) == RTmp);
|
|
i = ai[r.val].i;
|
|
nl = ai[r.val].l;
|
|
nr = ai[r.val].r;
|
|
if (i) {
|
|
if (nl > nr) {
|
|
al = i->arg[1];
|
|
ar = i->arg[0];
|
|
t = nl, nl = nr, nr = t;
|
|
} else {
|
|
al = i->arg[0];
|
|
ar = i->arg[1];
|
|
}
|
|
}
|
|
switch (ai[r.val].n) {
|
|
default:
|
|
diag("isel: amatch defaulted");
|
|
case 3: /* s * i */
|
|
if (!top) {
|
|
a->index = al;
|
|
a->scale = fn->con[ar.val].bits.i;
|
|
} else
|
|
a->base = r;
|
|
break;
|
|
case 4: /* b + s * i */
|
|
switch (nr) {
|
|
case 0:
|
|
if (fn->tmp[ar.val].slot != -1) {
|
|
al = i->arg[1];
|
|
ar = i->arg[0];
|
|
}
|
|
a->index = ar;
|
|
a->scale = 1;
|
|
break;
|
|
case 3:
|
|
amatch(a, ar, ai, fn, 0);
|
|
break;
|
|
}
|
|
r = al;
|
|
case 0:
|
|
s = fn->tmp[r.val].slot;
|
|
if (s != -1)
|
|
r = SLOT(s);
|
|
a->base = r;
|
|
break;
|
|
case 2: /* constants */
|
|
case 5: /* o + s * i */
|
|
case 6: /* o + b */
|
|
case 7: /* o + b + s * i */
|
|
amatch(a, ar, ai, fn, 0);
|
|
amatch(a, al, ai, fn, 0);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* instruction selection
|
|
* requires use counts (as given by parsing)
|
|
*/
|
|
void
|
|
isel(Fn *fn)
|
|
{
|
|
Blk *b, **sb;
|
|
Ins *i, *i0, *ip;
|
|
Phi *p;
|
|
uint a;
|
|
int n, m, al;
|
|
int64_t sz;
|
|
ANum *ainfo;
|
|
|
|
for (n=0; n<fn->ntmp; n++)
|
|
fn->tmp[n].slot = -1;
|
|
fn->slot = 0;
|
|
|
|
/* lower arguments */
|
|
for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
|
|
if (i->op != OPar && i->op != OParc)
|
|
break;
|
|
selpar(fn, b->ins, i);
|
|
n = b->nins - (i - b->ins) + (curi - insb);
|
|
i0 = alloc(n * sizeof(Ins));
|
|
ip = icpy(ip = i0, insb, curi - insb);
|
|
ip = icpy(ip, i, &b->ins[b->nins] - i);
|
|
b->nins = n;
|
|
b->ins = i0;
|
|
|
|
/* lower function calls */
|
|
for (b=fn->start; b; b=b->link) {
|
|
curi = &insb[NIns];
|
|
for (i=&b->ins[b->nins]; i!=b->ins;) {
|
|
if ((--i)->op == OCall) {
|
|
for (i0=i; i0>b->ins; i0--)
|
|
if ((i0-1)->op != OArg)
|
|
if ((i0-1)->op != OArgc)
|
|
break;
|
|
selcall(fn, i0, i);
|
|
i = i0;
|
|
continue;
|
|
}
|
|
assert(i->op != OArg && i->op != OArgc);
|
|
emiti(*i);
|
|
}
|
|
b->nins = &insb[NIns] - curi;
|
|
idup(&b->ins, curi, b->nins);
|
|
}
|
|
|
|
if (debug['A']) {
|
|
fprintf(stderr, "\n> After call lowering:\n");
|
|
printfn(fn, stderr);
|
|
}
|
|
|
|
/* assign slots to fast allocs */
|
|
b = fn->start;
|
|
assert(NAlign == 3 && "change n=4 and sz /= 4 below");
|
|
for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2)
|
|
for (i=b->ins; i-b->ins < b->nins; i++)
|
|
if (i->op == al) {
|
|
if (rtype(i->arg[0]) != RCon)
|
|
break;
|
|
sz = fn->con[i->arg[0].val].bits.i;
|
|
if (sz < 0 || sz >= INT_MAX-3)
|
|
diag("isel: invalid alloc size");
|
|
sz = (sz + n-1) & -n;
|
|
sz /= 4;
|
|
fn->tmp[i->to.val].slot = fn->slot;
|
|
fn->slot += sz;
|
|
*i = (Ins){.op = ONop};
|
|
}
|
|
|
|
/* process basic blocks */
|
|
n = fn->ntmp;
|
|
ainfo = emalloc(n * sizeof ainfo[0]);
|
|
for (b=fn->start; b; b=b->link) {
|
|
curi = &insb[NIns];
|
|
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
|
|
for (p=(*sb)->phi; p; p=p->link) {
|
|
for (a=0; p->blk[a] != b; a++)
|
|
assert(a+1 < p->narg);
|
|
fixarg(&p->arg[a], p->cls, 1, fn);
|
|
}
|
|
for (m=0; m<n; m++)
|
|
ainfo[m] = (ANum){.n = 0, .i = 0};
|
|
anumber(ainfo, b, fn->con);
|
|
seljmp(b, fn);
|
|
for (i=&b->ins[b->nins]; i!=b->ins;)
|
|
sel(*--i, ainfo, fn);
|
|
b->nins = &insb[NIns] - curi;
|
|
idup(&b->ins, curi, b->nins);
|
|
}
|
|
free(ainfo);
|
|
|
|
if (debug['I']) {
|
|
fprintf(stderr, "\n> After instruction selection:\n");
|
|
printfn(fn, stderr);
|
|
}
|
|
}
|