libqbe/lisc/isel.c
Quentin Carbonneaux 72988e6aa8 oops, phi nodes rewrite for fast locals was trashed
The phi fixing mechanism can use emit(), so we need to
set curi before performing the rewrite.  Otherwise, we are
writing at random places in the instruction buffer (not so
bad because it is bounds checked), but then we loose the
instructions written (bad)!
2016-02-24 09:40:18 -05:00

969 lines
18 KiB
C

#include "lisc.h"
#include <limits.h>
/* For x86_64, do the following:
*
* - lower calls
* - check that constants are used only in
* places allowed
* - ensure immediates always fit in 32b
* - explicit machine register contraints
* on instructions like division.
* - implement fast locals (the streak of
* constant allocX in the first basic block)
* - recognize complex addressing modes
*
* Invariant: the use counts that are used
* in sel() must be sound. This
* is not so trivial, maybe the
* dce should be moved out...
*/
typedef struct ANum ANum;
typedef struct AClass AClass;
struct ANum {
char n, l, r;
Ins *i;
Ref mem;
};
static void amatch(Addr *, Ref, ANum *, Fn *, int);
static int
fcmptoi(int fc)
{
switch (fc) {
default: diag("isel: fcmptoi defaulted");
case FCle: return ICule;
case FClt: return ICult;
case FCgt: return ICugt;
case FCge: return ICuge;
case FCne: return ICne;
case FCeq: return ICeq;
case FCo: return ICXnp;
case FCuo: return ICXp;
}
}
static int
iscmp(int op, int *pk, int *pc)
{
int k, c;
if (OCmpw <= op && op <= OCmpw1) {
c = op - OCmpw;
k = Kw;
}
else if (OCmpl <= op && op <= OCmpl1) {
c = op - OCmpl;
k = Kl;
}
else if (OCmps <= op && op <= OCmps1) {
c = fcmptoi(op - OCmps);
k = Ks;
}
else if (OCmpd <= op && op <= OCmpd1) {
c = fcmptoi(op - OCmpd);
k = Kd;
}
else
return 0;
if (pk)
*pk = k;
if (pc)
*pc = c;
return 1;
}
static int
noimm(Ref r, Fn *fn)
{
int64_t val;
assert(rtype(r) == RCon);
switch (fn->con[r.val].type) {
default:
diag("isel: invalid constant");
case CAddr:
/* we only support the 'small'
* code model of the ABI, this
* means that we can always
* address data with 32bits
*/
return 0;
case CBits:
val = fn->con[r.val].bits.i;
return (val < INT32_MIN || val > INT32_MAX);
}
}
static int
rslot(Ref r, Fn *fn)
{
if (rtype(r) != RTmp)
return -1;
return fn->tmp[r.val].slot;
}
static int
argcls(Ins *i, int n)
{
switch (i->op) {
case OStores:
return n == 0 ? Ks : Kl;
case OStored:
return n == 0 ? Kd : Kl;
case OStoreb:
case OStoreh:
case OStorew:
return n == 0 ? Kw : Kl;
case OStorel:
return Kl;
default:
if (OCmpw <= i->op && i->op <= OCmpd1)
diag("isel: invalid call to argcls");
if (isload(i->op))
return Kl;
if (isext(i->op))
return Kw;
return i->cls;
}
}
static void
fixarg(Ref *r, int k, int phi, Fn *fn)
{
Addr a;
Ref r0, r1;
int s, n;
r1 = r0 = *r;
s = rslot(r0, fn);
if (KBASE(k) == 1 && rtype(r0) == RCon) {
/* load floating points from memory
* slots, they can't be used as
* immediates
*/
r1 = MEM(fn->nmem);
vgrow(&fn->mem, ++fn->nmem);
memset(&a, 0, sizeof a);
a.offset.type = CAddr;
n = stashfp(fn->con[r0.val].bits.i, KWIDE(k));
sprintf(a.offset.label, ".Lfp%d", n);
fn->mem[fn->nmem-1] = a;
}
else if (!phi && rtype(r0) == RCon && noimm(r0, fn)) {
/* load constants that do not fit in
* a 32bit signed integer into a
* long temporary
*/
r1 = newtmp("isel", fn);
emit(OCopy, Kl, r1, r0, R);
}
else if (s != -1) {
/* load fast locals' addresses into
* temporaries right before the
* instruction
*/
r1 = newtmp("isel", fn);
emit(OAddr, Kl, r1, SLOT(s), R);
}
*r = r1;
}
static void
chuse(Ref r, int du, Fn *fn)
{
if (rtype(r) == RTmp)
fn->tmp[r.val].nuse += du;
}
static void
seladdr(Ref *r, ANum *an, Fn *fn)
{
Addr a;
Ref r0, r1;
r0 = *r;
if (rtype(r0) == RTmp) {
chuse(r0, -1, fn);
r1 = an[r0.val].mem;
if (req(r1, R)) {
amatch(&a, r0, an, fn, 1);
vgrow(&fn->mem, ++fn->nmem);
fn->mem[fn->nmem-1] = a;
r1 = MEM(fn->nmem-1);
chuse(a.base, +1, fn);
chuse(a.index, +1, fn);
if (rtype(a.base) != RTmp)
if (rtype(a.index) != RTmp)
an[r0.val].mem = r1;
}
*r = r1;
}
}
static void
selcmp(Ref arg[2], int k, Fn *fn)
{
Ref r;
if (rtype(arg[0]) == RCon) {
r = arg[1];
arg[1] = arg[0];
arg[0] = r;
}
assert(rtype(arg[0]) != RCon);
emit(OXCmp, k, R, arg[1], arg[0]);
fixarg(&curi->arg[0], k, 0, fn);
}
static void
sel(Ins i, ANum *an, Fn *fn)
{
Ref r0, r1;
int x, k, kc;
int64_t val;
Ins *i0;
if (rtype(i.to) == RTmp)
if (!isreg(i.to) && !isreg(i.arg[0]) && !isreg(i.arg[1]))
if (fn->tmp[i.to.val].nuse == 0) {
chuse(i.arg[0], -1, fn);
chuse(i.arg[1], -1, fn);
return;
}
i0 = curi;
k = i.cls;
switch (i.op) {
case ODiv:
case ORem:
if (i.op == ODiv)
r0 = TMP(RAX), r1 = TMP(RDX);
else
r0 = TMP(RDX), r1 = TMP(RAX);
emit(OCopy, k, i.to, r0, R);
emit(OCopy, k, R, r1, R);
if (rtype(i.arg[1]) == RCon) {
/* immediates not allowed for
* divisions in x86
*/
r0 = newtmp("isel", fn);
} else
r0 = i.arg[1];
emit(OXDiv, k, R, r0, R);
emit(OSign, k, TMP(RDX), TMP(RAX), R);
emit(OCopy, k, TMP(RAX), i.arg[0], R);
if (rtype(i.arg[1]) == RCon)
emit(OCopy, k, r0, i.arg[1], R);
break;
case ONop:
break;
case OStored:
if (rtype(i.arg[0]) == RCon)
i.op = OStorel;
case OStores:
if (rtype(i.arg[0]) == RCon)
i.op = OStorew;
case OStorel:
case OStorew:
case OStoreh:
case OStoreb:
seladdr(&i.arg[1], an, fn);
goto Emit;
case_OLoad:
seladdr(&i.arg[0], an, fn);
goto Emit;
case OXPush:
case OCall:
case OSAlloc:
case OCopy:
case OAdd:
case OSub:
case OMul:
case OAnd:
case OXTest:
case_OExt:
Emit:
emiti(i);
fixarg(&curi->arg[0], argcls(curi, 0), 0, fn);
fixarg(&curi->arg[1], argcls(curi, 1), 0, fn);
break;
case OAlloc:
case OAlloc+1:
case OAlloc+2: /* == OAlloc1 */
/* we need to make sure
* the stack remains aligned
* (rsp = 0) mod 16
*/
if (rtype(i.arg[0]) == RCon) {
assert(fn->con[i.arg[0].val].type == CBits);
val = fn->con[i.arg[0].val].bits.i;
val = (val + 15) & ~INT64_C(15);
if (val < 0 || val > INT32_MAX)
diag("isel: alloc too large");
emit(OAlloc, Kl, i.to, getcon(val, fn), R);
} else {
/* r0 = (i.arg[0] + 15) & -16 */
r0 = newtmp("isel", fn);
r1 = newtmp("isel", fn);
emit(OSAlloc, Kl, i.to, r0, R);
emit(OAnd, 1, r0, r1, getcon(-16, fn));
emit(OAdd, 1, r1, i.arg[0], getcon(15, fn));
}
break;
default:
if (isext(i.op))
goto case_OExt;
if (isload(i.op))
goto case_OLoad;
if (iscmp(i.op, &kc, &x)) {
if (rtype(i.arg[0]) == RCon)
x = icmpop(x);
emit(OXSet+x, k, i.to, R, R);
selcmp(i.arg, kc, fn);
break;
}
diag("isel: non-exhaustive implementation");
}
while (i0 > curi && --i0)
if (rslot(i0->arg[0], fn) != -1
|| rslot(i0->arg[1], fn) != -1)
diag("isel: usupported address argument");
}
static Ins *
flagi(Ins *i0, Ins *i)
{
while (i>i0)
switch ((--i)->op) {
default:
if (iscmp(i->op, 0, 0))
return i;
if (isext(i->op) || isload(i->op))
continue;
return 0;
case OAdd: /* flag-setting */
case OSub:
case OAnd:
return i;
case OCopy: /* flag-transparent */
case OStored:
case OStores:
case OStorel:
case OStorew:
case OStoreh:
case OStoreb:;
}
return 0;
}
static void
seljmp(Blk *b, Fn *fn)
{
Ref r;
int c, k;
Ins *fi;
switch (b->jmp.type) {
default:
return;
case JRetc:
assert(!"retc todo");
case JRetw:
case JRetl:
case JRets:
case JRetd:
k = b->jmp.type - JRetw;
b->jmp.type = JRet0;
r = b->jmp.arg;
b->jmp.arg = R;
if (KBASE(k) == 0)
emit(OCopy, k, TMP(RAX), r, R);
else
emit(OCopy, k, TMP(XMM0), r, R);
return;
case JJnz:;
}
r = b->jmp.arg;
b->jmp.arg = R;
assert(!req(r, R));
if (rtype(r) == RCon) {
b->jmp.type = JJmp;
if (req(r, CON_Z))
b->s1 = b->s2;
b->s2 = 0;
return;
}
fi = flagi(b->ins, &b->ins[b->nins]);
if (fi && req(fi->to, r)) {
if (iscmp(fi->op, &k, &c)) {
if (rtype(fi->arg[0]) == RCon)
c = icmpop(c);
b->jmp.type = JXJc + c;
if (fn->tmp[r.val].nuse == 1) {
assert(fn->tmp[r.val].ndef == 1);
selcmp(fi->arg, k, fn);
*fi = (Ins){.op = ONop};
}
return;
}
if (fi->op == OAnd && fn->tmp[r.val].nuse == 1
&& (rtype(fi->arg[0]) == RTmp ||
rtype(fi->arg[1]) == RTmp)) {
fi->op = OXTest;
fi->to = R;
b->jmp.type = JXJc + ICne;
if (rtype(fi->arg[1]) == RCon) {
r = fi->arg[1];
fi->arg[1] = fi->arg[0];
fi->arg[0] = r;
}
return;
}
if (fn->tmp[r.val].nuse > 1) {
b->jmp.type = JXJc + ICne;
return;
}
}
selcmp((Ref[2]){r, CON_Z}, Kw, fn); /* todo, add long branch if non-zero */
b->jmp.type = JXJc + ICne;
}
struct AClass {
int inmem;
int align;
uint size;
int cls[2];
};
static void
aclass(AClass *a, Typ *t)
{
int e, s, n, cls;
uint sz, al;
sz = t->size;
al = 1u << t->align;
/* the ABI requires sizes to be rounded
* up to the nearest multiple of 8, moreover
* it makes it easy load and store structures
* in registers
*/
if (al < 8)
al = 8;
sz = (sz + al-1) & -al;
a->size = sz;
a->align = t->align;
if (t->dark || sz > 16) {
/* large or unaligned structures are
* required to be passed in memory
*/
a->inmem = 1;
return;
}
for (e=0, s=0; e<2; e++) {
cls = -1;
for (n=0; n<8 && t->seg[s].len; s++) {
if (t->seg[s].flt) {
if (cls == -1)
cls = Kd;
} else
cls = Kl;
n += t->seg[s].len;
}
assert(n <= 8);
a->cls[e] = cls;
}
}
static int
classify(Ins *i0, Ins *i1, AClass *ac, int op)
{
int nint, ni, nsse, ns, n, *pn;
AClass *a;
Ins *i;
nint = 6;
nsse = 8;
for (i=i0, a=ac; i<i1; i++, a++) {
if (i->op == op) {
if (KBASE(i->cls) == 0)
pn = &nint;
else
pn = &nsse;
if (*pn > 0) {
--*pn;
a->inmem = 0;
} else
a->inmem = 2;
a->align = 3;
a->size = 8;
a->cls[0] = i->cls;
} else {
n = i->arg[0].val & AMask;
aclass(a, &typ[n]);
if (a->inmem)
continue;
ni = ns = 0;
for (n=0; n<2; n++)
if (a->cls[n] == 0)
ni++;
else
ns++;
if (nint > ni && nsse > ns) {
nint -= ni;
nsse -= ns;
} else
a->inmem = 1;
}
}
return ((6-nint) << 4) | ((8-nsse) << 8);
}
int rsave[/* NRSave */] = {
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14
};
typedef char make_sure_rsave_has_correct_size[
sizeof rsave == NRSave * sizeof(int) ? 1 : -1
];
int rclob[NRClob] = {RBX, R12, R13, R14, R15};
ulong
calldef(Ins i, int p[2])
{
ulong b;
int ni, nf;
b = 0;
ni = i.arg[1].val & 3;
nf = (i.arg[1].val >> 2) & 3;
if (ni >= 1)
b |= BIT(RAX);
if (ni >= 2)
b |= BIT(RDX);
if (nf >= 1)
b |= BIT(XMM0);
if (nf >= 2)
b |= BIT(XMM1);
if (p) {
p[0] = ni;
p[1] = nf;
}
return b;
}
ulong
calluse(Ins i, int p[2])
{
ulong b;
int j, ni, nf;
b = 0;
ni = (i.arg[1].val >> 4) & 15;
nf = (i.arg[1].val >> 8) & 15;
for (j=0; j<ni; j++)
b |= BIT(rsave[j]);
for (j=0; j<nf; j++)
b |= BIT(XMM0+j);
if (p) {
p[0] = ni;
p[1] = nf;
}
return b;
}
static Ref
rarg(int ty, int *ni, int *ns)
{
if (KBASE(ty) == 0)
return TMP(rsave[(*ni)++]);
else
return TMP(XMM0 + (*ns)++);
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1)
{
Ins *i;
AClass *ac, *a;
int ca, ni, ns;
uint stk, sz;
Ref r, r1, r2;
ac = alloc((i1-i0) * sizeof ac[0]);
ca = classify(i0, i1, ac, OArg);
for (stk=0, a=&ac[i1-i0]; a>ac;)
if ((--a)->inmem) {
assert(a->align <= 4);
stk += a->size;
if (a->align == 4) /* todo, bigger alignments */
stk += stk & 15;
}
stk += stk & 15;
if (!req(i1->arg[1], R))
diag("struct-returning function not implemented");
if (stk) {
r = getcon(-(int64_t)stk, fn);
emit(OSAlloc, Kl, R, r, R);
}
emit(OCopy, i1->cls, i1->to, TMP(RAX), R);
emit(OCall, i1->cls, R, i1->arg[0], CALL(1 | ca));
for (i=i0, a=ac, ni=ns=0; i<i1; i++, a++) {
if (a->inmem)
continue;
r1 = rarg(a->cls[0], &ni, &ns);
if (i->op == OArgc) {
if (a->size > 8) {
r2 = rarg(a->cls[1], &ni, &ns);
r = newtmp("isel", fn);
emit(OLoad, a->cls[1], r2, r, R);
emit(OAdd, Kl, r, i->arg[1], getcon(8, fn));
}
emit(OLoad, a->cls[0], r1, i->arg[1], R);
} else
emit(OCopy, i->cls, r1, i->arg[0], R);
}
for (i=i0, a=ac; i<i1; i++, a++) {
if (!a->inmem)
continue;
sz = a->size;
if (a->align == 4)
sz += (stk-sz) & 15;
stk -= sz;
if (i->op == OArgc) {
assert(!"argc todo 1");
} else {
emit(OXPush, Kl, R, i->arg[0], R);
}
}
if (stk) {
assert(stk == 8);
emit(OXPush, Kl, R, CON_Z, R);
}
}
static void
selpar(Fn *fn, Ins *i0, Ins *i1)
{
AClass *ac, *a;
Ins *i;
int ni, ns, stk, al;
Ref r, r1;
ac = alloc((i1-i0) * sizeof ac[0]);
classify(i0, i1, ac, OPar);
curi = insb;
ni = ns = 0;
assert(NAlign == 3);
stk = -2;
for (i=i0, a=ac; i<i1; i++, a++) {
switch (a->inmem) {
case 1:
assert(!"argc todo 2");
continue;
case 2:
stk -= 2;
*curi++ = (Ins){OLoad, i->to, {SLOT(stk)}, i->cls};
continue;
}
r1 = rarg(a->cls[0], &ni, &ns);
if (i->op == OParc) {
r = newtmp("isel", fn);
*curi++ = (Ins){OCopy, r, {r1}, Kl};
a->cls[0] = r.val;
if (a->size > 8) {
r1 = rarg(a->cls[1], &ni, &ns);
r = newtmp("isel", fn);
*curi++ = (Ins){OCopy, r, {r1}, Kl};
a->cls[1] = r.val;
}
} else
*curi++ = (Ins){OCopy, i->to, {r1}, i->cls};
}
for (i=i0, a=ac; i<i1; i++, a++) {
if (i->op != OParc || a->inmem)
continue;
assert(NAlign == 3);
for (al=0; a->align >> (al+2); al++)
;
r = TMP(a->cls[0]);
r1 = i->to;
*curi++ = (Ins){OAlloc+al, r1, {getcon(a->size, fn)}, Kl};
*curi++ = (Ins){OStorel, R, {r, r1}, 0};
if (a->size > 8) {
r = newtmp("isel", fn);
*curi++ = (Ins){OAdd, r, {r1, getcon(8, fn)}, Kl};
r1 = TMP(a->cls[1]);
*curi++ = (Ins){OStorel, R, {r1, r}, 0};
}
}
}
static int
aref(Ref r, ANum *ai)
{
switch (rtype(r)) {
default:
diag("isel: aref defaulted");
case RCon:
return 2;
case RTmp:
return ai[r.val].n;
}
}
static int
ascale(Ref r, Con *con)
{
int64_t n;
if (rtype(r) != RCon)
return 0;
if (con[r.val].type != CBits)
return 0;
n = con[r.val].bits.i;
return n == 1 || n == 2 || n == 4 || n == 8;
}
static void
anumber(ANum *ai, Blk *b, Con *con)
{
/* This should be made obsolete by a proper
* reassoc pass.
*
* Rules:
*
* RTmp(_) -> 0 tmp
* ( RTmp(_) -> 1 slot )
* RCon(_) -> 2 con
* 0 * 2 -> 3 s * i (when constant is 1,2,4,8)
*/
static char add[10][10] = {
[2] [2] = 2, /* folding */
[2] [5] = 5, [5] [2] = 5,
[2] [6] = 6, [6] [2] = 6,
[2] [7] = 7, [7] [2] = 7,
[0] [0] = 4, /* 4: b + s * i */
[0] [3] = 4, [3] [0] = 4,
[2] [3] = 5, [3] [2] = 5, /* 5: o + s * i */
[0] [2] = 6, [2] [0] = 6, /* 6: o + b */
[2] [4] = 7, [4] [2] = 7, /* 7: o + b + s * i */
[0] [5] = 7, [5] [0] = 7,
[6] [3] = 7, [3] [6] = 7,
};
int a, a1, a2, n1, n2, t1, t2;
Ins *i;
for (i=b->ins; i-b->ins < b->nins; i++) {
if (rtype(i->to) == RTmp)
ai[i->to.val].i = i;
if (i->op != OAdd && i->op != OMul)
continue;
a1 = aref(i->arg[0], ai);
a2 = aref(i->arg[1], ai);
t1 = a1 != 1 && a1 != 2;
t2 = a2 != 1 && a2 != 2;
if (i->op == OAdd) {
a = add[n1 = a1][n2 = a2];
if (t1 && a < add[0][a2])
a = add[n1 = 0][n2 = a2];
if (t2 && a < add[a1][0])
a = add[n1 = a1][n2 = 0];
if (t1 && t2 && a < add[0][0])
a = add[n1 = 0][n2 = 0];
} else {
n1 = n2 = a = 0;
if (ascale(i->arg[0], con) && t2)
a = 3, n1 = 2, n2 = 0;
if (t1 && ascale(i->arg[1], con))
a = 3, n1 = 0, n2 = 2;
}
ai[i->to.val].n = a;
ai[i->to.val].l = n1;
ai[i->to.val].r = n2;
}
}
static void
amatch(Addr *a, Ref r, ANum *ai, Fn *fn, int top)
{
Ins *i;
int nl, nr, t, s;
Ref al, ar;
if (top)
memset(a, 0, sizeof *a);
if (rtype(r) == RCon) {
addcon(&a->offset, &fn->con[r.val]);
return;
}
assert(rtype(r) == RTmp);
i = ai[r.val].i;
nl = ai[r.val].l;
nr = ai[r.val].r;
if (i) {
if (nl > nr) {
al = i->arg[1];
ar = i->arg[0];
t = nl, nl = nr, nr = t;
} else {
al = i->arg[0];
ar = i->arg[1];
}
}
switch (ai[r.val].n) {
default:
diag("isel: amatch defaulted");
case 3: /* s * i */
if (!top) {
a->index = al;
a->scale = fn->con[ar.val].bits.i;
} else
a->base = r;
break;
case 4: /* b + s * i */
switch (nr) {
case 0:
if (fn->tmp[ar.val].slot != -1) {
al = i->arg[1];
ar = i->arg[0];
}
a->index = ar;
a->scale = 1;
break;
case 3:
amatch(a, ar, ai, fn, 0);
break;
}
r = al;
case 0:
s = fn->tmp[r.val].slot;
if (s != -1)
r = SLOT(s);
a->base = r;
break;
case 2: /* constants */
case 5: /* o + s * i */
case 6: /* o + b */
case 7: /* o + b + s * i */
amatch(a, ar, ai, fn, 0);
amatch(a, al, ai, fn, 0);
break;
}
}
/* instruction selection
* requires use counts (as given by parsing)
*/
void
isel(Fn *fn)
{
Blk *b, **sb;
Ins *i, *i0, *ip;
Phi *p;
uint a;
int n, m, al;
int64_t sz;
ANum *ainfo;
for (n=0; n<fn->ntmp; n++)
fn->tmp[n].slot = -1;
fn->slot = 0;
/* lower arguments */
for (b=fn->start, i=b->ins; i-b->ins < b->nins; i++)
if (i->op != OPar && i->op != OParc)
break;
selpar(fn, b->ins, i);
n = b->nins - (i - b->ins) + (curi - insb);
i0 = alloc(n * sizeof(Ins));
ip = icpy(ip = i0, insb, curi - insb);
ip = icpy(ip, i, &b->ins[b->nins] - i);
b->nins = n;
b->ins = i0;
/* lower function calls */
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (i=&b->ins[b->nins]; i!=b->ins;) {
if ((--i)->op == OCall) {
for (i0=i; i0>b->ins; i0--)
if ((i0-1)->op != OArg)
if ((i0-1)->op != OArgc)
break;
selcall(fn, i0, i);
i = i0;
continue;
}
assert(i->op != OArg && i->op != OArgc);
emiti(*i);
}
b->nins = &insb[NIns] - curi;
idup(&b->ins, curi, b->nins);
}
if (debug['A']) {
fprintf(stderr, "\n> After call lowering:\n");
printfn(fn, stderr);
}
/* assign slots to fast allocs */
b = fn->start;
assert(NAlign == 3 && "change n=4 and sz /= 4 below");
for (al=OAlloc, n=4; al<=OAlloc1; al++, n*=2)
for (i=b->ins; i-b->ins < b->nins; i++)
if (i->op == al) {
if (rtype(i->arg[0]) != RCon)
break;
sz = fn->con[i->arg[0].val].bits.i;
if (sz < 0 || sz >= INT_MAX-3)
diag("isel: invalid alloc size");
sz = (sz + n-1) & -n;
sz /= 4;
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
*i = (Ins){.op = ONop};
}
/* process basic blocks */
n = fn->ntmp;
ainfo = emalloc(n * sizeof ainfo[0]);
for (b=fn->start; b; b=b->link) {
curi = &insb[NIns];
for (sb=(Blk*[3]){b->s1, b->s2, 0}; *sb; sb++)
for (p=(*sb)->phi; p; p=p->link) {
for (a=0; p->blk[a] != b; a++)
assert(a+1 < p->narg);
fixarg(&p->arg[a], p->cls, 1, fn);
}
for (m=0; m<n; m++)
ainfo[m] = (ANum){.n = 0, .i = 0};
anumber(ainfo, b, fn->con);
seljmp(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
sel(*--i, ainfo, fn);
b->nins = &insb[NIns] - curi;
idup(&b->ins, curi, b->nins);
}
free(ainfo);
if (debug['I']) {
fprintf(stderr, "\n> After instruction selection:\n");
printfn(fn, stderr);
}
}