Compare commits

..

5 commits

Author SHA1 Message Date
sam
c403479279 Merge branch 'merge' 2024-10-29 22:50:30 +13:00
Quentin Carbonneaux
90050202f5 fix various codegen bugs on arm64
- dynamic allocations could generate
  bad 'and' instructions (for the
  and with -16 in salloc()).
- symbols used in w context would
  generate adrp and add instructions
  on wN registers while they seem to
  only work on xN registers.

Thanks to Rosie for reporting them.
2024-10-01 19:42:50 +02:00
Quentin Carbonneaux
626f0b2781 skip preludes for some leaf fns
When rbp is not necessary to compile
a leaf function, we skip saving and
restoring it.
2024-08-23 18:31:53 +02:00
Alexey Yerin
bb8de8c633 arm64/isel: Avoid signed overflow when handling immediates
Clang incorrectly optimizes this negation with -O2 and causes QBE to
emit 0 in place of INT64_MIN.
2024-08-15 23:21:05 +02:00
Quentin Carbonneaux
efcb5465e5 align emitted code
Functions are now aligned on 16-byte
boundaries. This mimics gcc and should
help reduce the maximum perf impact of
cosmetic code changes. Previously, any
change in the output of qbe could have
far reaching implications on alignment.

Thanks to Roland Paterson-Jones for
pointing out the variability issue.
2024-08-15 23:11:41 +02:00
5 changed files with 159 additions and 104 deletions

View file

@ -1,6 +1,16 @@
#include "all.h"
typedef struct E E;
struct E {
FILE *f;
Fn *fn;
int fp;
uint64_t fsz;
int nclob;
};
#define CMP(X) \
X(Ciule, "be") \
X(Ciult, "b") \
@ -142,23 +152,29 @@ static char *rname[][4] = {
static int
slot(Ref r, Fn *fn)
slot(Ref r, E *e)
{
int s;
s = rsval(r);
assert(s <= fn->slot);
assert(s <= e->fn->slot);
/* specific to NAlign == 3 */
if (s < 0)
return -4 * s;
else if (fn->vararg)
return -176 + -4 * (fn->slot - s);
if (s < 0) {
if (e->fp == RSP)
return 4*-s - 8 + e->fsz + e->nclob*8;
else
return 4*-s;
}
else if (e->fp == RSP)
return 4*s + e->nclob*8;
else if (e->fn->vararg)
return -176 + -4 * (e->fn->slot - s);
else
return -4 * (fn->slot - s);
return -4 * (e->fn->slot - s);
}
static void
emitcon(Con *con, FILE *f)
emitcon(Con *con, E *e)
{
char *p, *l;
@ -168,16 +184,16 @@ emitcon(Con *con, FILE *f)
p = l[0] == '"' ? "" : T.assym;
if (con->sym.type == SThr) {
if (T.apple)
fprintf(f, "%s%s@TLVP", p, l);
fprintf(e->f, "%s%s@TLVP", p, l);
else
fprintf(f, "%%fs:%s%s@tpoff", p, l);
fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
} else
fprintf(f, "%s%s", p, l);
fprintf(e->f, "%s%s", p, l);
if (con->bits.i)
fprintf(f, "%+"PRId64, con->bits.i);
fprintf(e->f, "%+"PRId64, con->bits.i);
break;
case CBits:
fprintf(f, "%"PRId64, con->bits.i);
fprintf(e->f, "%"PRId64, con->bits.i);
break;
default:
die("unreachable");
@ -212,10 +228,10 @@ getarg(char c, Ins *i)
}
}
static void emitins(Ins, Fn *, FILE *);
static void emitins(Ins, E *);
static void
emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
emitcopy(Ref r1, Ref r2, int k, E *e)
{
Ins icp;
@ -223,11 +239,11 @@ emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
icp.arg[0] = r2;
icp.to = r1;
icp.cls = k;
emitins(icp, fn, f);
emitins(icp, e);
}
static void
emitf(char *s, Ins *i, Fn *fn, FILE *f)
emitf(char *s, Ins *i, E *e)
{
static char clstoa[][3] = {"l", "q", "ss", "sd"};
char c;
@ -247,25 +263,25 @@ emitf(char *s, Ins *i, Fn *fn, FILE *f)
case '-':
assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
"cannot convert to 2-address");
emitcopy(i->to, i->arg[0], i->cls, fn, f);
emitcopy(i->to, i->arg[0], i->cls, e);
s++;
break;
}
fputc('\t', f);
fputc('\t', e->f);
Next:
while ((c = *s++) != '%')
if (!c) {
fputc('\n', f);
fputc('\n', e->f);
return;
} else
fputc(c, f);
fputc(c, e->f);
switch ((c = *s++)) {
case '%':
fputc('%', f);
fputc('%', e->f);
break;
case 'k':
fputs(clstoa[i->cls], f);
fputs(clstoa[i->cls], e->f);
break;
case '0':
case '1':
@ -282,37 +298,42 @@ Next:
switch (rtype(ref)) {
case RTmp:
assert(isreg(ref));
fprintf(f, "%%%s", regtoa(ref.val, sz));
fprintf(e->f, "%%%s", regtoa(ref.val, sz));
break;
case RSlot:
fprintf(f, "%d(%%rbp)", slot(ref, fn));
fprintf(e->f, "%d(%%%s)",
slot(ref, e),
regtoa(e->fp, SLong)
);
break;
case RMem:
Mem:
m = &fn->mem[ref.val];
m = &e->fn->mem[ref.val];
if (rtype(m->base) == RSlot) {
off.type = CBits;
off.bits.i = slot(m->base, fn);
off.bits.i = slot(m->base, e);
addcon(&m->offset, &off, 1);
m->base = TMP(RBP);
m->base = TMP(e->fp);
}
if (m->offset.type != CUndef)
emitcon(&m->offset, f);
fputc('(', f);
emitcon(&m->offset, e);
fputc('(', e->f);
if (!req(m->base, R))
fprintf(f, "%%%s", regtoa(m->base.val, SLong));
fprintf(e->f, "%%%s",
regtoa(m->base.val, SLong)
);
else if (m->offset.type == CAddr)
fprintf(f, "%%rip");
fprintf(e->f, "%%rip");
if (!req(m->index, R))
fprintf(f, ", %%%s, %d",
fprintf(e->f, ", %%%s, %d",
regtoa(m->index.val, SLong),
m->scale
);
fputc(')', f);
fputc(')', e->f);
break;
case RCon:
fputc('$', f);
emitcon(&fn->con[ref.val], f);
fputc('$', e->f);
emitcon(&e->fn->con[ref.val], e);
break;
default:
die("unreachable");
@ -337,18 +358,21 @@ Next:
case RMem:
goto Mem;
case RSlot:
fprintf(f, "%d(%%rbp)", slot(ref, fn));
fprintf(e->f, "%d(%%%s)",
slot(ref, e),
regtoa(e->fp, SLong)
);
break;
case RCon:
off = fn->con[ref.val];
emitcon(&off, f);
off = e->fn->con[ref.val];
emitcon(&off, e);
if (off.type == CAddr)
if (off.sym.type != SThr || T.apple)
fprintf(f, "(%%rip)");
fprintf(e->f, "(%%rip)");
break;
case RTmp:
assert(isreg(ref));
fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
break;
default:
die("unreachable");
@ -366,7 +390,7 @@ static void *negmask[4] = {
};
static void
emitins(Ins i, Fn *fn, FILE *f)
emitins(Ins i, E *e)
{
Ref r;
int64_t val;
@ -393,7 +417,7 @@ emitins(Ins i, Fn *fn, FILE *f)
|| (omap[o].cls == Ka))
break;
}
emitf(omap[o].fmt, &i, fn, f);
emitf(omap[o].fmt, &i, e);
break;
case Onop:
/* just do nothing for nops, they are inserted
@ -410,7 +434,7 @@ emitins(Ins i, Fn *fn, FILE *f)
if (KBASE(i.cls) == 0 /* only available for ints */
&& rtype(i.arg[0]) == RCon
&& rtype(i.arg[1]) == RTmp) {
emitf("imul%k %0, %1, %=", &i, fn, f);
emitf("imul%k %0, %1, %=", &i, e);
break;
}
goto Table;
@ -419,18 +443,18 @@ emitins(Ins i, Fn *fn, FILE *f)
* some 3-address subtractions */
if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
emitins(ineg, fn, f);
emitf("add%k %0, %=", &i, fn, f);
emitins(ineg, e);
emitf("add%k %0, %=", &i, e);
break;
}
goto Table;
case Oneg:
if (!req(i.to, i.arg[0]))
emitf("mov%k %0, %=", &i, fn, f);
emitf("mov%k %0, %=", &i, e);
if (KBASE(i.cls) == 0)
emitf("neg%k %=", &i, fn, f);
emitf("neg%k %=", &i, e);
else
fprintf(f,
fprintf(e->f,
"\txorp%c %sfp%d(%%rip), %%%s\n",
"xxsd"[i.cls],
T.asloc,
@ -443,8 +467,8 @@ emitins(Ins i, Fn *fn, FILE *f)
* conversion to 2-address in emitf() would fail */
if (req(i.to, i.arg[1])) {
i.arg[1] = TMP(XMM0+15);
emitf("mov%k %=, %1", &i, fn, f);
emitf("mov%k %0, %=", &i, fn, f);
emitf("mov%k %=, %1", &i, e);
emitf("mov%k %0, %=", &i, e);
i.arg[0] = i.to;
}
goto Table;
@ -460,53 +484,54 @@ emitins(Ins i, Fn *fn, FILE *f)
t0 = rtype(i.arg[0]);
if (i.cls == Kl
&& t0 == RCon
&& fn->con[i.arg[0].val].type == CBits) {
val = fn->con[i.arg[0].val].bits.i;
&& e->fn->con[i.arg[0].val].type == CBits) {
val = e->fn->con[i.arg[0].val].bits.i;
if (isreg(i.to))
if (val >= 0 && val <= UINT32_MAX) {
emitf("movl %W0, %W=", &i, fn, f);
emitf("movl %W0, %W=", &i, e);
break;
}
if (rtype(i.to) == RSlot)
if (val < INT32_MIN || val > INT32_MAX) {
emitf("movl %0, %=", &i, fn, f);
emitf("movl %0>>32, 4+%=", &i, fn, f);
emitf("movl %0, %=", &i, e);
emitf("movl %0>>32, 4+%=", &i, e);
break;
}
}
if (isreg(i.to)
&& t0 == RCon
&& fn->con[i.arg[0].val].type == CAddr) {
emitf("lea%k %M0, %=", &i, fn, f);
&& e->fn->con[i.arg[0].val].type == CAddr) {
emitf("lea%k %M0, %=", &i, e);
break;
}
if (rtype(i.to) == RSlot
&& (t0 == RSlot || t0 == RMem)) {
i.cls = KWIDE(i.cls) ? Kd : Ks;
i.arg[1] = TMP(XMM0+15);
emitf("mov%k %0, %1", &i, fn, f);
emitf("mov%k %1, %=", &i, fn, f);
emitf("mov%k %0, %1", &i, e);
emitf("mov%k %1, %=", &i, e);
break;
}
/* conveniently, the assembler knows if it
* should use movabsq when reading movq */
emitf("mov%k %0, %=", &i, fn, f);
emitf("mov%k %0, %=", &i, e);
break;
case Oaddr:
if (!T.apple
&& rtype(i.arg[0]) == RCon
&& fn->con[i.arg[0].val].sym.type == SThr) {
&& e->fn->con[i.arg[0].val].sym.type == SThr) {
/* derive the symbol address from the TCB
* address at offset 0 of %fs */
assert(isreg(i.to));
con = &fn->con[i.arg[0].val];
con = &e->fn->con[i.arg[0].val];
sym = str(con->sym.id);
emitf("movq %%fs:0, %L=", &i, fn, f);
fprintf(f, "\tleaq %s%s@tpoff",
emitf("movq %%fs:0, %L=", &i, e);
fprintf(e->f, "\tleaq %s%s@tpoff",
sym[0] == '"' ? "" : T.assym, sym);
if (con->bits.i)
fprintf(f, "%+"PRId64, con->bits.i);
fprintf(f, "(%%%s), %%%s\n",
fprintf(e->f, "%+"PRId64,
con->bits.i);
fprintf(e->f, "(%%%s), %%%s\n",
regtoa(i.to.val, SLong),
regtoa(i.to.val, SLong));
break;
@ -517,12 +542,12 @@ emitins(Ins i, Fn *fn, FILE *f)
* assembly... */
switch (rtype(i.arg[0])) {
case RCon:
fprintf(f, "\tcallq ");
emitcon(&fn->con[i.arg[0].val], f);
fprintf(f, "\n");
fprintf(e->f, "\tcallq ");
emitcon(&e->fn->con[i.arg[0].val], e);
fprintf(e->f, "\n");
break;
case RTmp:
emitf("callq *%L0", &i, fn, f);
emitf("callq *%L0", &i, e);
break;
default:
die("invalid call argument");
@ -533,9 +558,10 @@ emitins(Ins i, Fn *fn, FILE *f)
* maybe we should split Osalloc in 2 different
* instructions depending on the result
*/
emitf("subq %L0, %%rsp", &i, fn, f);
assert(e->fp == RBP);
emitf("subq %L0, %%rsp", &i, e);
if (!req(i.to, R))
emitcopy(i.to, TMP(RSP), Kl, fn, f);
emitcopy(i.to, TMP(RSP), Kl, e);
break;
case Oswap:
if (KBASE(i.cls) == 0)
@ -543,27 +569,35 @@ emitins(Ins i, Fn *fn, FILE *f)
/* for floats, there is no swap instruction
* so we use xmm15 as a temporary
*/
emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
emitcopy(i.arg[0], i.arg[1], i.cls, e);
emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
break;
case Odbgloc:
emitdbgloc(i.arg[0].val, i.arg[1].val, f);
emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
break;
}
}
static uint64_t
framesz(Fn *fn)
static void
framesz(E *e)
{
uint64_t i, o, f;
/* specific to NAlign == 3 */
for (i=0, o=0; i<NCLR; i++)
o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
f = fn->slot;
o = 0;
if (!e->fn->leaf) {
for (i=0, o=0; i<NCLR; i++)
o ^= e->fn->reg >> amd64_sysv_rclob[i];
o &= 1;
}
f = e->fn->slot;
f = (f + 3) & -4;
return 4*f + 8*o + 176*fn->vararg;
if (f > 0
&& e->fp == RSP
&& e->fn->salign == 4)
f += 2;
e->fsz = 4*f + 8*o + 176*e->fn->vararg;
}
void
@ -578,13 +612,19 @@ amd64_emitfn(Fn *fn, FILE *f)
Blk *b, *s;
Ins *i, itmp;
int *r, c, o, n, lbl;
uint64_t fs;
E *e;
e = &(E){.f = f, .fn = fn};
emitfnlnk(fn->name, &fn->lnk, f);
fputs("\tendbr64\n\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
fs = framesz(fn);
if (fs)
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs);
fputs("\tendbr64\n", f);
if (!fn->leaf || fn->vararg || fn->dynalloc) {
e->fp = RBP;
fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
} else
e->fp = RSP;
framesz(e);
if (e->fsz)
fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
if (fn->vararg) {
o = -176;
for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
@ -595,15 +635,15 @@ amd64_emitfn(Fn *fn, FILE *f)
for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
if (fn->reg & BIT(*r)) {
itmp.arg[0] = TMP(*r);
emitf("pushq %L0", &itmp, fn, f);
fs += 8;
emitf("pushq %L0", &itmp, e);
e->nclob++;
}
for (lbl=0, b=fn->start; b; b=b->link) {
if (lbl || b->npred > 1)
fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
for (i=b->ins; i!=&b->ins[b->nins]; i++)
emitins(*i, fn, f);
emitins(*i, e);
lbl = 1;
switch (b->jmp.type) {
case Jhlt:
@ -614,17 +654,19 @@ amd64_emitfn(Fn *fn, FILE *f)
fprintf(f,
"\tmovq %%rbp, %%rsp\n"
"\tsubq $%"PRIu64", %%rsp\n",
fs
);
e->fsz + e->nclob * 8);
for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
if (fn->reg & BIT(*--r)) {
itmp.arg[0] = TMP(*r);
emitf("popq %L0", &itmp, fn, f);
emitf("popq %L0", &itmp, e);
}
fprintf(f,
"\tleave\n"
"\tret\n"
);
if (e->fp == RBP)
fputs("\tleave\n", f);
else if (e->fsz)
fprintf(f,
"\taddq $%"PRIu64", %%rsp\n",
e->fsz);
fputs("\tret\n", f);
break;
case Jjmp:
Jmp:

View file

@ -808,6 +808,7 @@ amd64_isel(Fn *fn)
die("alloc too large");
fn->tmp[i->to.val].slot = fn->slot;
fn->slot += sz;
fn->salign = 2 + al - Oalloc;
*i = (Ins){.op = Onop};
}

View file

@ -160,7 +160,8 @@ emitf(char *s, Ins *i, E *e)
Ref r;
int k, c;
Con *pc;
uint n, sp;
uint64_t n;
uint sp;
fputc('\t', e->f);
@ -217,10 +218,17 @@ emitf(char *s, Ins *i, E *e)
pc = &e->fn->con[r.val];
n = pc->bits.i;
assert(pc->type == CBits);
if (n & 0xfff000)
fprintf(e->f, "#%u, lsl #12", n>>12);
else
fprintf(e->f, "#%u", n);
if (n >> 24) {
assert(arm64_logimm(n, k));
fprintf(e->f, "#%"PRIu64, n);
} else if (n & 0xfff000) {
assert(!(n & ~0xfff000ull));
fprintf(e->f, "#%"PRIu64", lsl #12",
n>>12);
} else {
assert(!(n & ~0xfffull));
fprintf(e->f, "#%"PRIu64, n);
}
break;
}
break;
@ -304,6 +312,7 @@ loadcon(Con *c, int r, int k, E *e)
rn = rname(r, k);
n = c->bits.i;
if (c->type == CAddr) {
rn = rname(r, Kl);
loadaddr(c, rn, e);
return;
}

View file

@ -24,7 +24,7 @@ imm(Con *c, int k, int64_t *pn)
i = Iplo12;
if (n < 0) {
i = Inlo12;
n = -n;
n = -(uint64_t)n;
}
*pn = n;
if ((n & 0x000fff) == n)

View file

@ -694,6 +694,7 @@ parseline(PState ps)
goto Ins;
}
if (op == Tcall) {
curf->leaf = 0;
arg[0] = parseref();
parserefl(1);
op = Ocall;
@ -910,6 +911,7 @@ parsefn(Lnk *lnk)
curf->con[0].bits.i = 0xdeaddead; /* UNDEF */
curf->con[1].type = CBits;
curf->lnk = *lnk;
curf->leaf = 1;
blink = &curf->start;
curf->retty = Kx;
if (peek() != Tglo)
@ -1219,6 +1221,7 @@ parse(FILE *f, char *path, void dbgfile(char *), void data(Dat *), void func(Fn
dbgfile(tokval.str);
break;
case Tfunc:
lnk.align = 16;
func(parsefn(&lnk));
break;
case Tdata: