libqbe/amd64/sysv.c
Quentin Carbonneaux 49a4593c33 prepare for multi-target
This big diff does multiple changes to allow
the addition of new targets to qbe.  The
changes are listed below in decreasing order
of impact.

1. Add a new Target structure.

To add support for a given target, one has to
implement all the members of the Target
structure.  All the source files where changed
to use this interface where needed.

2. Single out amd64-specific code.

In this commit, the amd64 target T_amd64_sysv
is the only target available, it is implemented
in the amd64/ directory.  All the non-static
items in this directory are prefixed with either
amd64_ or amd64_sysv (for items that are
specific to the System V ABI).

3. Centralize Ops information.

There is now a file 'ops.h' that must be used to
store all the available operations together with
their metadata.  The various targets will only
select what they need; but it is beneficial that
there is only *one* place to change to add a new
instruction.

One good side effect of this change is that any
operation 'xyz' in the IL now as a corresponding
'Oxyz' in the code.

4. Misc fixes.

One notable change is that instruction selection
now generates generic comparison operations and
the lowering to the target's comparisons is done
in the emitter.

GAS directives for data are the same for many
targets, so data emission was extracted in a
file 'gas.c'.

5. Modularize the Makefile.

The Makefile now has a list of C files that
are target-independent (SRC), and one list
of C files per target.  Each target can also
use its own 'all.h' header (for example to
define registers).
2017-04-08 21:56:20 -04:00

701 lines
14 KiB
C

#include "all.h"
typedef struct AClass AClass;
typedef struct RAlloc RAlloc;
struct AClass {
int inmem;
int align;
uint size;
int cls[2];
Ref ref[2];
};
struct RAlloc {
Ins i;
RAlloc *link;
};
static void
classify(AClass *a, Typ *t, int *pn, int *pe)
{
Seg *seg;
int n, s, *cls;
for (n=0; n<t->nunion; n++) {
seg = t->seg[n];
for (s=0; *pe<2; (*pe)++) {
cls = &a->cls[*pe];
for (; *pn<8; s++) {
switch (seg[s].type) {
case SEnd:
goto Done;
case SPad:
/* don't change anything */
break;
case SFlt:
if (*cls == Kx)
*cls = Kd;
break;
case SInt:
*cls = Kl;
break;
case STyp:
classify(a, &typ[seg[s].len], pn, pe);
continue;
}
*pn += seg[s].len;
}
Done:
assert(*pn <= 8);
*pn = 0;
}
}
}
static void
typclass(AClass *a, Typ *t)
{
int e, n;
uint sz, al;
sz = t->size;
al = 1u << t->align;
/* the ABI requires sizes to be rounded
* up to the nearest multiple of 8, moreover
* it makes it easy load and store structures
* in registers
*/
if (al < 8)
al = 8;
sz = (sz + al-1) & -al;
a->size = sz;
a->align = t->align;
if (t->dark || sz > 16 || sz == 0) {
/* large or unaligned structures are
* required to be passed in memory
*/
a->inmem = 1;
return;
}
a->cls[0] = Kx;
a->cls[1] = Kx;
a->inmem = 0;
n = 0;
e = 0;
classify(a, t, &n, &e);
}
static int
retr(Ref reg[2], AClass *aret)
{
static int retreg[2][2] = {{RAX, RDX}, {XMM0, XMM0+1}};
int n, k, ca, nr[2];
nr[0] = nr[1] = 0;
ca = 0;
for (n=0; (uint)n*8<aret->size; n++) {
k = KBASE(aret->cls[n]);
reg[n] = TMP(retreg[k][nr[k]++]);
ca += 1 << (2 * k);
}
return ca;
}
static void
selret(Blk *b, Fn *fn)
{
int j, k, ca;
Ref r, r0, reg[2];
AClass aret;
j = b->jmp.type;
if (!isret(j) || j == Jret0)
return;
r0 = b->jmp.arg;
b->jmp.type = Jret0;
if (j == Jretc) {
typclass(&aret, &typ[fn->retty]);
if (aret.inmem) {
assert(rtype(fn->retr) == RTmp);
emit(Ocopy, Kl, TMP(RAX), fn->retr, R);
blit(fn->retr, 0, r0, aret.size, fn);
ca = 1;
} else {
ca = retr(reg, &aret);
if (aret.size > 8) {
r = newtmp("abi", Kl, fn);
emit(Oload, Kl, reg[1], r, R);
emit(Oadd, Kl, r, r0, getcon(8, fn));
}
emit(Oload, Kl, reg[0], r0, R);
}
} else {
k = j - Jretw;
if (KBASE(k) == 0) {
emit(Ocopy, k, TMP(RAX), r0, R);
ca = 1;
} else {
emit(Ocopy, k, TMP(XMM0), r0, R);
ca = 1 << 2;
}
}
b->jmp.arg = CALL(ca);
}
static int
argsclass(Ins *i0, Ins *i1, AClass *ac, int op, AClass *aret, Ref *env)
{
int nint, ni, nsse, ns, n, *pn;
AClass *a;
Ins *i;
if (aret && aret->inmem)
nint = 5; /* hidden argument */
else
nint = 6;
nsse = 8;
for (i=i0, a=ac; i<i1; i++, a++)
switch (i->op - op + Oarg) {
case Oarg:
if (KBASE(i->cls) == 0)
pn = &nint;
else
pn = &nsse;
if (*pn > 0) {
--*pn;
a->inmem = 0;
} else
a->inmem = 2;
a->align = 3;
a->size = 8;
a->cls[0] = i->cls;
break;
case Oargc:
n = i->arg[0].val;
typclass(a, &typ[n]);
if (a->inmem)
continue;
ni = ns = 0;
for (n=0; (uint)n*8<a->size; n++)
if (KBASE(a->cls[n]) == 0)
ni++;
else
ns++;
if (nint >= ni && nsse >= ns) {
nint -= ni;
nsse -= ns;
} else
a->inmem = 1;
break;
case Oarge:
if (op == Opar)
*env = i->to;
else
*env = i->arg[0];
break;
}
return ((6-nint) << 4) | ((8-nsse) << 8);
}
int amd64_sysv_rsave[] = {
RDI, RSI, RDX, RCX, R8, R9, R10, R11, RAX,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, -1
};
int amd64_sysv_rclob[] = {RBX, R12, R13, R14, R15, -1};
MAKESURE(sysv_arrays_ok,
sizeof amd64_sysv_rsave == (NGPS+NFPS+1) * sizeof(int) &&
sizeof amd64_sysv_rclob == (NCLR+1) * sizeof(int)
);
/* layout of call's second argument (RCall)
*
* 29 12 8 4 3 0
* |0...00|x|xxxx|xxxx|xx|xx| range
* | | | | ` gp regs returned (0..2)
* | | | ` sse regs returned (0..2)
* | | ` gp regs passed (0..6)
* | ` sse regs passed (0..8)
* ` 1 if rax is used to pass data (0..1)
*/
bits
amd64_sysv_retregs(Ref r, int p[2])
{
bits b;
int ni, nf;
assert(rtype(r) == RCall);
b = 0;
ni = r.val & 3;
nf = (r.val >> 2) & 3;
if (ni >= 1)
b |= BIT(RAX);
if (ni >= 2)
b |= BIT(RDX);
if (nf >= 1)
b |= BIT(XMM0);
if (nf >= 2)
b |= BIT(XMM1);
if (p) {
p[0] = ni;
p[1] = nf;
}
return b;
}
bits
amd64_sysv_argregs(Ref r, int p[2])
{
bits b;
int j, ni, nf, ra;
assert(rtype(r) == RCall);
b = 0;
ni = (r.val >> 4) & 15;
nf = (r.val >> 8) & 15;
ra = (r.val >> 12) & 1;
for (j=0; j<ni; j++)
b |= BIT(amd64_sysv_rsave[j]);
for (j=0; j<nf; j++)
b |= BIT(XMM0+j);
if (p) {
p[0] = ni + ra;
p[1] = nf;
}
return b | (ra ? BIT(RAX) : 0);
}
static Ref
rarg(int ty, int *ni, int *ns)
{
if (KBASE(ty) == 0)
return TMP(amd64_sysv_rsave[(*ni)++]);
else
return TMP(XMM0 + (*ns)++);
}
static void
selcall(Fn *fn, Ins *i0, Ins *i1, RAlloc **rap)
{
Ins *i;
AClass *ac, *a, aret;
int ca, ni, ns, al, varc, envc;
uint stk, off;
Ref r, r1, r2, reg[2], env;
RAlloc *ra;
env = R;
ac = alloc((i1-i0) * sizeof ac[0]);
if (!req(i1->arg[1], R)) {
assert(rtype(i1->arg[1]) == RType);
typclass(&aret, &typ[i1->arg[1].val]);
ca = argsclass(i0, i1, ac, Oarg, &aret, &env);
} else
ca = argsclass(i0, i1, ac, Oarg, 0, &env);
for (stk=0, a=&ac[i1-i0]; a>ac;)
if ((--a)->inmem) {
if (a->align > 4)
err("sysv abi requires alignments of 16 or less");
stk += a->size;
if (a->align == 4)
stk += stk & 15;
}
stk += stk & 15;
if (stk) {
r = getcon(-(int64_t)stk, fn);
emit(Osalloc, Kl, R, r, R);
}
if (!req(i1->arg[1], R)) {
if (aret.inmem) {
/* get the return location from eax
* it saves one callee-save reg */
r1 = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, i1->to, TMP(RAX), R);
ca += 1;
} else {
if (aret.size > 8) {
r = newtmp("abi", Kl, fn);
aret.ref[1] = newtmp("abi", aret.cls[1], fn);
emit(Ostorel, 0, R, aret.ref[1], r);
emit(Oadd, Kl, r, i1->to, getcon(8, fn));
}
aret.ref[0] = newtmp("abi", aret.cls[0], fn);
emit(Ostorel, 0, R, aret.ref[0], i1->to);
ca += retr(reg, &aret);
if (aret.size > 8)
emit(Ocopy, aret.cls[1], aret.ref[1], reg[1], R);
emit(Ocopy, aret.cls[0], aret.ref[0], reg[0], R);
r1 = i1->to;
}
/* allocate return pad */
ra = alloc(sizeof *ra);
/* specific to NAlign == 3 */
al = aret.align >= 2 ? aret.align - 2 : 0;
ra->i = (Ins){Oalloc+al, r1, {getcon(aret.size, fn)}, Kl};
ra->link = (*rap);
*rap = ra;
} else {
ra = 0;
if (KBASE(i1->cls) == 0) {
emit(Ocopy, i1->cls, i1->to, TMP(RAX), R);
ca += 1;
} else {
emit(Ocopy, i1->cls, i1->to, TMP(XMM0), R);
ca += 1 << 2;
}
}
envc = !req(R, env);
varc = i1->op == Ovacall;
if (varc && envc)
err("sysv abi does not support variadic env calls");
ca |= (varc | envc) << 12;
emit(Ocall, i1->cls, R, i1->arg[0], CALL(ca));
if (envc)
emit(Ocopy, Kl, TMP(RAX), env, R);
if (varc)
emit(Ocopy, Kw, TMP(RAX), getcon((ca >> 8) & 15, fn), R);
ni = ns = 0;
if (ra && aret.inmem)
emit(Ocopy, Kl, rarg(Kl, &ni, &ns), ra->i.to, R); /* pass hidden argument */
for (i=i0, a=ac; i<i1; i++, a++) {
if (a->inmem)
continue;
r1 = rarg(a->cls[0], &ni, &ns);
if (i->op == Oargc) {
if (a->size > 8) {
r2 = rarg(a->cls[1], &ni, &ns);
r = newtmp("abi", Kl, fn);
emit(Oload, a->cls[1], r2, r, R);
emit(Oadd, Kl, r, i->arg[1], getcon(8, fn));
}
emit(Oload, a->cls[0], r1, i->arg[1], R);
} else
emit(Ocopy, i->cls, r1, i->arg[0], R);
}
if (!stk)
return;
r = newtmp("abi", Kl, fn);
for (i=i0, a=ac, off=0; i<i1; i++, a++) {
if (!a->inmem)
continue;
if (i->op == Oargc) {
if (a->align == 4)
off += off & 15;
blit(r, off, i->arg[1], a->size, fn);
} else {
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, i->arg[0], r1);
emit(Oadd, Kl, r1, r, getcon(off, fn));
}
off += a->size;
}
emit(Osalloc, Kl, r, getcon(stk, fn), R);
}
static int
selpar(Fn *fn, Ins *i0, Ins *i1)
{
AClass *ac, *a, aret;
Ins *i;
int ni, ns, s, al, fa;
Ref r, env;
env = R;
ac = alloc((i1-i0) * sizeof ac[0]);
curi = &insb[NIns];
ni = ns = 0;
if (fn->retty >= 0) {
typclass(&aret, &typ[fn->retty]);
fa = argsclass(i0, i1, ac, Opar, &aret, &env);
} else
fa = argsclass(i0, i1, ac, Opar, 0, &env);
for (i=i0, a=ac; i<i1; i++, a++) {
if (i->op != Oparc || a->inmem)
continue;
if (a->size > 8) {
r = newtmp("abi", Kl, fn);
a->ref[1] = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, a->ref[1], r);
emit(Oadd, Kl, r, i->to, getcon(8, fn));
}
a->ref[0] = newtmp("abi", Kl, fn);
emit(Ostorel, 0, R, a->ref[0], i->to);
/* specific to NAlign == 3 */
al = a->align >= 2 ? a->align - 2 : 0;
emit(Oalloc+al, Kl, i->to, getcon(a->size, fn), R);
}
if (fn->retty >= 0 && aret.inmem) {
r = newtmp("abi", Kl, fn);
emit(Ocopy, Kl, r, rarg(Kl, &ni, &ns), R);
fn->retr = r;
}
for (i=i0, a=ac, s=4; i<i1; i++, a++) {
switch (a->inmem) {
case 1:
if (a->align > 4)
err("sysv abi requires alignments of 16 or less");
if (a->align == 4)
s = (s+3) & -4;
fn->tmp[i->to.val].slot = -s;
s += a->size / 4;
continue;
case 2:
emit(Oload, i->cls, i->to, SLOT(-s), R);
s += 2;
continue;
}
r = rarg(a->cls[0], &ni, &ns);
if (i->op == Oparc) {
emit(Ocopy, Kl, a->ref[0], r, R);
if (a->size > 8) {
r = rarg(a->cls[1], &ni, &ns);
emit(Ocopy, Kl, a->ref[1], r, R);
}
} else
emit(Ocopy, i->cls, i->to, r, R);
}
if (!req(R, env))
emit(Ocopy, Kl, env, TMP(RAX), R);
return fa | (s*4)<<12;
}
static Blk *
split(Fn *fn, Blk *b)
{
Blk *bn;
++fn->nblk;
bn = blknew();
bn->nins = &insb[NIns] - curi;
idup(&bn->ins, curi, bn->nins);
curi = &insb[NIns];
bn->visit = ++b->visit;
snprintf(bn->name, NString, "%s.%d", b->name, b->visit);
bn->loop = b->loop;
bn->link = b->link;
b->link = bn;
return bn;
}
static void
chpred(Blk *b, Blk *bp, Blk *bp1)
{
Phi *p;
uint a;
for (p=b->phi; p; p=p->link) {
for (a=0; p->blk[a]!=bp; a++)
assert(a+1<p->narg);
p->blk[a] = bp1;
}
}
static void
selvaarg(Fn *fn, Blk *b, Ins *i)
{
Ref loc, lreg, lstk, nr, r0, r1, c4, c8, c16, c, ap;
Blk *b0, *bstk, *breg;
int isint;
c4 = getcon(4, fn);
c8 = getcon(8, fn);
c16 = getcon(16, fn);
ap = i->arg[0];
isint = KBASE(i->cls) == 0;
/* @b [...]
r0 =l add ap, (0 or 4)
nr =l loadsw r0
r1 =w cultw nr, (48 or 176)
jnz r1, @breg, @bstk
@breg
r0 =l add ap, 16
r1 =l loadl r0
lreg =l add r1, nr
r0 =w add nr, (8 or 16)
r1 =l add ap, (0 or 4)
storew r0, r1
@bstk
r0 =l add ap, 8
lstk =l loadl r0
r1 =l add lstk, 8
storel r1, r0
@b0
%loc =l phi @breg %lreg, @bstk %lstk
i->to =(i->cls) load %loc
*/
loc = newtmp("abi", Kl, fn);
emit(Oload, i->cls, i->to, loc, R);
b0 = split(fn, b);
b0->jmp = b->jmp;
b0->s1 = b->s1;
b0->s2 = b->s2;
if (b->s1)
chpred(b->s1, b, b0);
if (b->s2 && b->s2 != b->s1)
chpred(b->s2, b, b0);
lreg = newtmp("abi", Kl, fn);
nr = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kw, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, r0, r1);
emit(Oadd, Kl, r1, ap, isint ? CON_Z : c4);
emit(Oadd, Kw, r0, nr, isint ? c8 : c16);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Oadd, Kl, lreg, r1, nr);
emit(Oload, Kl, r1, r0, R);
emit(Oadd, Kl, r0, ap, c16);
breg = split(fn, b);
breg->jmp.type = Jjmp;
breg->s1 = b0;
lstk = newtmp("abi", Kl, fn);
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, lstk, c8);
emit(Oload, Kl, lstk, r0, R);
emit(Oadd, Kl, r0, ap, c8);
bstk = split(fn, b);
bstk->jmp.type = Jjmp;
bstk->s1 = b0;
b0->phi = alloc(sizeof *b0->phi);
*b0->phi = (Phi){
.cls = Kl, .to = loc,
.narg = 2,
.blk = {bstk, breg},
.arg = {lstk, lreg},
};
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kw, fn);
b->jmp.type = Jjnz;
b->jmp.arg = r1;
b->s1 = breg;
b->s2 = bstk;
c = getcon(isint ? 48 : 176, fn);
emit(Ocmpw+Ciult, Kw, r1, nr, c);
emit(Oloadsw, Kl, nr, r0, R);
emit(Oadd, Kl, r0, ap, isint ? CON_Z : c4);
}
static void
selvastart(Fn *fn, int fa, Ref ap)
{
Ref r0, r1;
int gp, fp, sp;
gp = ((fa >> 4) & 15) * 8;
fp = 48 + ((fa >> 8) & 15) * 16;
sp = fa >> 12;
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, TMP(RBP), getcon(-176, fn));
emit(Oadd, Kl, r0, ap, getcon(16, fn));
r0 = newtmp("abi", Kl, fn);
r1 = newtmp("abi", Kl, fn);
emit(Ostorel, Kw, R, r1, r0);
emit(Oadd, Kl, r1, TMP(RBP), getcon(sp, fn));
emit(Oadd, Kl, r0, ap, getcon(8, fn));
r0 = newtmp("abi", Kl, fn);
emit(Ostorew, Kw, R, getcon(fp, fn), r0);
emit(Oadd, Kl, r0, ap, getcon(4, fn));
emit(Ostorew, Kw, R, getcon(gp, fn), ap);
}
void
amd64_sysv_abi(Fn *fn)
{
Blk *b;
Ins *i, *i0, *ip;
RAlloc *ral;
int n, fa;
for (b=fn->start; b; b=b->link)
b->visit = 0;
/* lower parameters */
for (b=fn->start, i=b->ins; i-b->ins<b->nins; i++)
if (!ispar(i->op))
break;
fa = selpar(fn, b->ins, i);
n = b->nins - (i - b->ins) + (&insb[NIns] - curi);
i0 = alloc(n * sizeof(Ins));
ip = icpy(ip = i0, curi, &insb[NIns] - curi);
ip = icpy(ip, i, &b->ins[b->nins] - i);
b->nins = n;
b->ins = i0;
/* lower calls, returns, and vararg instructions */
ral = 0;
b = fn->start;
do {
if (!(b = b->link))
b = fn->start; /* do it last */
if (b->visit)
continue;
curi = &insb[NIns];
selret(b, fn);
for (i=&b->ins[b->nins]; i!=b->ins;)
switch ((--i)->op) {
default:
emiti(*i);
break;
case Ocall:
case Ovacall:
for (i0=i; i0>b->ins; i0--)
if (!isarg((i0-1)->op))
break;
selcall(fn, i0, i, &ral);
i = i0;
break;
case Ovastart:
selvastart(fn, fa, i->arg[0]);
break;
case Ovaarg:
selvaarg(fn, b, i);
break;
case Oarg:
case Oargc:
die("unreachable");
}
if (b == fn->start)
for (; ral; ral=ral->link)
emiti(ral->i);
b->nins = &insb[NIns] - curi;
idup(&b->ins, curi, b->nins);
} while (b != fn->start);
if (debug['A']) {
fprintf(stderr, "\n> After ABI lowering:\n");
printfn(fn, stderr);
}
}