diff --git a/amd64/emit.c b/amd64/emit.c
index 8f36188..00dd80f 100644
--- a/amd64/emit.c
+++ b/amd64/emit.c
@@ -1,16 +1,6 @@
 #include "all.h"
 
 
-typedef struct E E;
-
-struct E {
-	FILE *f;
-	Fn *fn;
-	int fp;
-	uint64_t fsz;
-	int nclob;
-};
-
 #define CMP(X) \
 	X(Ciule,      "be") \
 	X(Ciult,      "b")  \
@@ -152,29 +142,23 @@ static char *rname[][4] = {
 
 
 static int
-slot(Ref r, E *e)
+slot(Ref r, Fn *fn)
 {
 	int s;
 
 	s = rsval(r);
-	assert(s <= e->fn->slot);
+	assert(s <= fn->slot);
 	/* specific to NAlign == 3 */
-	if (s < 0) {
-		if (e->fp == RSP)
-			return 4*-s - 8 + e->fsz + e->nclob*8;
-		else
-			return 4*-s;
-	}
-	else if (e->fp == RSP)
-		return 4*s + e->nclob*8;
-	else if (e->fn->vararg)
-		return -176 + -4 * (e->fn->slot - s);
+	if (s < 0)
+		return -4 * s;
+	else if (fn->vararg)
+		return -176 + -4 * (fn->slot - s);
 	else
-		return -4 * (e->fn->slot - s);
+		return -4 * (fn->slot - s);
 }
 
 static void
-emitcon(Con *con, E *e)
+emitcon(Con *con, FILE *f)
 {
 	char *p, *l;
 
@@ -184,16 +168,16 @@ emitcon(Con *con, E *e)
 		p = l[0] == '"' ? "" : T.assym;
 		if (con->sym.type == SThr) {
 			if (T.apple)
-				fprintf(e->f, "%s%s@TLVP", p, l);
+				fprintf(f, "%s%s@TLVP", p, l);
 			else
-				fprintf(e->f, "%%fs:%s%s@tpoff", p, l);
+				fprintf(f, "%%fs:%s%s@tpoff", p, l);
 		} else
-			fprintf(e->f, "%s%s", p, l);
+			fprintf(f, "%s%s", p, l);
 		if (con->bits.i)
-			fprintf(e->f, "%+"PRId64, con->bits.i);
+			fprintf(f, "%+"PRId64, con->bits.i);
 		break;
 	case CBits:
-		fprintf(e->f, "%"PRId64, con->bits.i);
+		fprintf(f, "%"PRId64, con->bits.i);
 		break;
 	default:
 		die("unreachable");
@@ -228,10 +212,10 @@ getarg(char c, Ins *i)
 	}
 }
 
-static void emitins(Ins, E *);
+static void emitins(Ins, Fn *, FILE *);
 
 static void
-emitcopy(Ref r1, Ref r2, int k, E *e)
+emitcopy(Ref r1, Ref r2, int k, Fn *fn, FILE *f)
 {
 	Ins icp;
 
@@ -239,11 +223,11 @@ emitcopy(Ref r1, Ref r2, int k, E *e)
 	icp.arg[0] = r2;
 	icp.to = r1;
 	icp.cls = k;
-	emitins(icp, e);
+	emitins(icp, fn, f);
 }
 
 static void
-emitf(char *s, Ins *i, E *e)
+emitf(char *s, Ins *i, Fn *fn, FILE *f)
 {
 	static char clstoa[][3] = {"l", "q", "ss", "sd"};
 	char c;
@@ -263,25 +247,25 @@ emitf(char *s, Ins *i, E *e)
 	case '-':
 		assert((!req(i->arg[1], i->to) || req(i->arg[0], i->to)) &&
 			"cannot convert to 2-address");
-		emitcopy(i->to, i->arg[0], i->cls, e);
+		emitcopy(i->to, i->arg[0], i->cls, fn, f);
 		s++;
 		break;
 	}
 
-	fputc('\t', e->f);
+	fputc('\t', f);
 Next:
 	while ((c = *s++) != '%')
 		if (!c) {
-			fputc('\n', e->f);
+			fputc('\n', f);
 			return;
 		} else
-			fputc(c, e->f);
+			fputc(c, f);
 	switch ((c = *s++)) {
 	case '%':
-		fputc('%', e->f);
+		fputc('%', f);
 		break;
 	case 'k':
-		fputs(clstoa[i->cls], e->f);
+		fputs(clstoa[i->cls], f);
 		break;
 	case '0':
 	case '1':
@@ -298,42 +282,37 @@ Next:
 		switch (rtype(ref)) {
 		case RTmp:
 			assert(isreg(ref));
-			fprintf(e->f, "%%%s", regtoa(ref.val, sz));
+			fprintf(f, "%%%s", regtoa(ref.val, sz));
 			break;
 		case RSlot:
-			fprintf(e->f, "%d(%%%s)",
-				slot(ref, e),
-				regtoa(e->fp, SLong)
-			);
+			fprintf(f, "%d(%%rbp)", slot(ref, fn));
 			break;
 		case RMem:
 		Mem:
-			m = &e->fn->mem[ref.val];
+			m = &fn->mem[ref.val];
 			if (rtype(m->base) == RSlot) {
 				off.type = CBits;
-				off.bits.i = slot(m->base, e);
+				off.bits.i = slot(m->base, fn);
 				addcon(&m->offset, &off, 1);
-				m->base = TMP(e->fp);
+				m->base = TMP(RBP);
 			}
 			if (m->offset.type != CUndef)
-				emitcon(&m->offset, e);
-			fputc('(', e->f);
+				emitcon(&m->offset, f);
+			fputc('(', f);
 			if (!req(m->base, R))
-				fprintf(e->f, "%%%s",
-					regtoa(m->base.val, SLong)
-				);
+				fprintf(f, "%%%s", regtoa(m->base.val, SLong));
 			else if (m->offset.type == CAddr)
-				fprintf(e->f, "%%rip");
+				fprintf(f, "%%rip");
 			if (!req(m->index, R))
-				fprintf(e->f, ", %%%s, %d",
+				fprintf(f, ", %%%s, %d",
 					regtoa(m->index.val, SLong),
 					m->scale
 				);
-			fputc(')', e->f);
+			fputc(')', f);
 			break;
 		case RCon:
-			fputc('$', e->f);
-			emitcon(&e->fn->con[ref.val], e);
+			fputc('$', f);
+			emitcon(&fn->con[ref.val], f);
 			break;
 		default:
 			die("unreachable");
@@ -358,21 +337,18 @@ Next:
 		case RMem:
 			goto Mem;
 		case RSlot:
-			fprintf(e->f, "%d(%%%s)",
-				slot(ref, e),
-				regtoa(e->fp, SLong)
-			);
+			fprintf(f, "%d(%%rbp)", slot(ref, fn));
 			break;
 		case RCon:
-			off = e->fn->con[ref.val];
-			emitcon(&off, e);
+			off = fn->con[ref.val];
+			emitcon(&off, f);
 			if (off.type == CAddr)
 			if (off.sym.type != SThr || T.apple)
-				fprintf(e->f, "(%%rip)");
+				fprintf(f, "(%%rip)");
 			break;
 		case RTmp:
 			assert(isreg(ref));
-			fprintf(e->f, "(%%%s)", regtoa(ref.val, SLong));
+			fprintf(f, "(%%%s)", regtoa(ref.val, SLong));
 			break;
 		default:
 			die("unreachable");
@@ -390,7 +366,7 @@ static void *negmask[4] = {
 };
 
 static void
-emitins(Ins i, E *e)
+emitins(Ins i, Fn *fn, FILE *f)
 {
 	Ref r;
 	int64_t val;
@@ -417,7 +393,7 @@ emitins(Ins i, E *e)
 			|| (omap[o].cls == Ka))
 				break;
 		}
-		emitf(omap[o].fmt, &i, e);
+		emitf(omap[o].fmt, &i, fn, f);
 		break;
 	case Onop:
 		/* just do nothing for nops, they are inserted
@@ -434,7 +410,7 @@ emitins(Ins i, E *e)
 		if (KBASE(i.cls) == 0 /* only available for ints */
 		&& rtype(i.arg[0]) == RCon
 		&& rtype(i.arg[1]) == RTmp) {
-			emitf("imul%k %0, %1, %=", &i, e);
+			emitf("imul%k %0, %1, %=", &i, fn, f);
 			break;
 		}
 		goto Table;
@@ -443,18 +419,18 @@ emitins(Ins i, E *e)
 		 * some 3-address subtractions */
 		if (req(i.to, i.arg[1]) && !req(i.arg[0], i.to)) {
 			ineg = (Ins){Oneg, i.cls, i.to, {i.to}};
-			emitins(ineg, e);
-			emitf("add%k %0, %=", &i, e);
+			emitins(ineg, fn, f);
+			emitf("add%k %0, %=", &i, fn, f);
 			break;
 		}
 		goto Table;
 	case Oneg:
 		if (!req(i.to, i.arg[0]))
-			emitf("mov%k %0, %=", &i, e);
+			emitf("mov%k %0, %=", &i, fn, f);
 		if (KBASE(i.cls) == 0)
-			emitf("neg%k %=", &i, e);
+			emitf("neg%k %=", &i, fn, f);
 		else
-			fprintf(e->f,
+			fprintf(f,
 				"\txorp%c %sfp%d(%%rip), %%%s\n",
 				"xxsd"[i.cls],
 				T.asloc,
@@ -467,8 +443,8 @@ emitins(Ins i, E *e)
 		 * conversion to 2-address in emitf() would fail */
 		if (req(i.to, i.arg[1])) {
 			i.arg[1] = TMP(XMM0+15);
-			emitf("mov%k %=, %1", &i, e);
-			emitf("mov%k %0, %=", &i, e);
+			emitf("mov%k %=, %1", &i, fn, f);
+			emitf("mov%k %0, %=", &i, fn, f);
 			i.arg[0] = i.to;
 		}
 		goto Table;
@@ -484,54 +460,53 @@ emitins(Ins i, E *e)
 		t0 = rtype(i.arg[0]);
 		if (i.cls == Kl
 		&& t0 == RCon
-		&& e->fn->con[i.arg[0].val].type == CBits) {
-			val = e->fn->con[i.arg[0].val].bits.i;
+		&& fn->con[i.arg[0].val].type == CBits) {
+			val = fn->con[i.arg[0].val].bits.i;
 			if (isreg(i.to))
 			if (val >= 0 && val <= UINT32_MAX) {
-				emitf("movl %W0, %W=", &i, e);
+				emitf("movl %W0, %W=", &i, fn, f);
 				break;
 			}
 			if (rtype(i.to) == RSlot)
 			if (val < INT32_MIN || val > INT32_MAX) {
-				emitf("movl %0, %=", &i, e);
-				emitf("movl %0>>32, 4+%=", &i, e);
+				emitf("movl %0, %=", &i, fn, f);
+				emitf("movl %0>>32, 4+%=", &i, fn, f);
 				break;
 			}
 		}
 		if (isreg(i.to)
 		&& t0 == RCon
-		&& e->fn->con[i.arg[0].val].type == CAddr) {
-			emitf("lea%k %M0, %=", &i, e);
+		&& fn->con[i.arg[0].val].type == CAddr) {
+			emitf("lea%k %M0, %=", &i, fn, f);
 			break;
 		}
 		if (rtype(i.to) == RSlot
 		&& (t0 == RSlot || t0 == RMem)) {
 			i.cls = KWIDE(i.cls) ? Kd : Ks;
 			i.arg[1] = TMP(XMM0+15);
-			emitf("mov%k %0, %1", &i, e);
-			emitf("mov%k %1, %=", &i, e);
+			emitf("mov%k %0, %1", &i, fn, f);
+			emitf("mov%k %1, %=", &i, fn, f);
 			break;
 		}
 		/* conveniently, the assembler knows if it
 		 * should use movabsq when reading movq */
-		emitf("mov%k %0, %=", &i, e);
+		emitf("mov%k %0, %=", &i, fn, f);
 		break;
 	case Oaddr:
 		if (!T.apple
 		&& rtype(i.arg[0]) == RCon
-		&& e->fn->con[i.arg[0].val].sym.type == SThr) {
+		&& fn->con[i.arg[0].val].sym.type == SThr) {
 			/* derive the symbol address from the TCB
 			 * address at offset 0 of %fs */
 			assert(isreg(i.to));
-			con = &e->fn->con[i.arg[0].val];
+			con = &fn->con[i.arg[0].val];
 			sym = str(con->sym.id);
-			emitf("movq %%fs:0, %L=", &i, e);
-			fprintf(e->f, "\tleaq %s%s@tpoff",
+			emitf("movq %%fs:0, %L=", &i, fn, f);
+			fprintf(f, "\tleaq %s%s@tpoff",
 				sym[0] == '"' ? "" : T.assym, sym);
 			if (con->bits.i)
-				fprintf(e->f, "%+"PRId64,
-					con->bits.i);
-			fprintf(e->f, "(%%%s), %%%s\n",
+				fprintf(f, "%+"PRId64, con->bits.i);
+			fprintf(f, "(%%%s), %%%s\n",
 				regtoa(i.to.val, SLong),
 				regtoa(i.to.val, SLong));
 			break;
@@ -542,12 +517,12 @@ emitins(Ins i, E *e)
 		 * assembly... */
 		switch (rtype(i.arg[0])) {
 		case RCon:
-			fprintf(e->f, "\tcallq ");
-			emitcon(&e->fn->con[i.arg[0].val], e);
-			fprintf(e->f, "\n");
+			fprintf(f, "\tcallq ");
+			emitcon(&fn->con[i.arg[0].val], f);
+			fprintf(f, "\n");
 			break;
 		case RTmp:
-			emitf("callq *%L0", &i, e);
+			emitf("callq *%L0", &i, fn, f);
 			break;
 		default:
 			die("invalid call argument");
@@ -558,10 +533,9 @@ emitins(Ins i, E *e)
 		 * maybe we should split Osalloc in 2 different
 		 * instructions depending on the result
 		 */
-		assert(e->fp == RBP);
-		emitf("subq %L0, %%rsp", &i, e);
+		emitf("subq %L0, %%rsp", &i, fn, f);
 		if (!req(i.to, R))
-			emitcopy(i.to, TMP(RSP), Kl, e);
+			emitcopy(i.to, TMP(RSP), Kl, fn, f);
 		break;
 	case Oswap:
 		if (KBASE(i.cls) == 0)
@@ -569,35 +543,27 @@ emitins(Ins i, E *e)
 		/* for floats, there is no swap instruction
 		 * so we use xmm15 as a temporary
 		 */
-		emitcopy(TMP(XMM0+15), i.arg[0], i.cls, e);
-		emitcopy(i.arg[0], i.arg[1], i.cls, e);
-		emitcopy(i.arg[1], TMP(XMM0+15), i.cls, e);
+		emitcopy(TMP(XMM0+15), i.arg[0], i.cls, fn, f);
+		emitcopy(i.arg[0], i.arg[1], i.cls, fn, f);
+		emitcopy(i.arg[1], TMP(XMM0+15), i.cls, fn, f);
 		break;
 	case Odbgloc:
-		emitdbgloc(i.arg[0].val, i.arg[1].val, e->f);
+		emitdbgloc(i.arg[0].val, i.arg[1].val, f);
 		break;
 	}
 }
 
-static void
-framesz(E *e)
+static uint64_t
+framesz(Fn *fn)
 {
 	uint64_t i, o, f;
 
 	/* specific to NAlign == 3 */
-	o = 0;
-	if (!e->fn->leaf) {
-		for (i=0, o=0; i<NCLR; i++)
-			o ^= e->fn->reg >> amd64_sysv_rclob[i];
-		o &= 1;
-	}
-	f = e->fn->slot;
+	for (i=0, o=0; i<NCLR; i++)
+		o ^= 1 & (fn->reg >> amd64_sysv_rclob[i]);
+	f = fn->slot;
 	f = (f + 3) & -4;
-	if (f > 0
-	&& e->fp == RSP
-	&& e->fn->salign == 4)
-		f += 2;
-	e->fsz = 4*f + 8*o + 176*e->fn->vararg;
+	return 4*f + 8*o + 176*fn->vararg;
 }
 
 void
@@ -612,19 +578,13 @@ amd64_emitfn(Fn *fn, FILE *f)
 	Blk *b, *s;
 	Ins *i, itmp;
 	int *r, c, o, n, lbl;
-	E *e;
+	uint64_t fs;
 
-	e = &(E){.f = f, .fn = fn};
 	emitfnlnk(fn->name, &fn->lnk, f);
-	fputs("\tendbr64\n", f);
-	if (!fn->leaf || fn->vararg || fn->dynalloc) {
-		e->fp = RBP;
-		fputs("\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
-	} else
-		e->fp = RSP;
-	framesz(e);
-	if (e->fsz)
-		fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", e->fsz);
+	fputs("\tendbr64\n\tpushq %rbp\n\tmovq %rsp, %rbp\n", f);
+	fs = framesz(fn);
+	if (fs)
+		fprintf(f, "\tsubq $%"PRIu64", %%rsp\n", fs);
 	if (fn->vararg) {
 		o = -176;
 		for (r=amd64_sysv_rsave; r<&amd64_sysv_rsave[6]; r++, o+=8)
@@ -635,15 +595,15 @@ amd64_emitfn(Fn *fn, FILE *f)
 	for (r=amd64_sysv_rclob; r<&amd64_sysv_rclob[NCLR]; r++)
 		if (fn->reg & BIT(*r)) {
 			itmp.arg[0] = TMP(*r);
-			emitf("pushq %L0", &itmp, e);
-			e->nclob++;
+			emitf("pushq %L0", &itmp, fn, f);
+			fs += 8;
 		}
 
 	for (lbl=0, b=fn->start; b; b=b->link) {
 		if (lbl || b->npred > 1)
 			fprintf(f, "%sbb%d:\n", T.asloc, id0+b->id);
 		for (i=b->ins; i!=&b->ins[b->nins]; i++)
-			emitins(*i, e);
+			emitins(*i, fn, f);
 		lbl = 1;
 		switch (b->jmp.type) {
 		case Jhlt:
@@ -654,19 +614,17 @@ amd64_emitfn(Fn *fn, FILE *f)
 				fprintf(f,
 					"\tmovq %%rbp, %%rsp\n"
 					"\tsubq $%"PRIu64", %%rsp\n",
-					e->fsz + e->nclob * 8);
+					fs
+				);
 			for (r=&amd64_sysv_rclob[NCLR]; r>amd64_sysv_rclob;)
 				if (fn->reg & BIT(*--r)) {
 					itmp.arg[0] = TMP(*r);
-					emitf("popq %L0", &itmp, e);
+					emitf("popq %L0", &itmp, fn, f);
 				}
-			if (e->fp == RBP)
-				fputs("\tleave\n", f);
-			else if (e->fsz)
-				fprintf(f,
-					"\taddq $%"PRIu64", %%rsp\n",
-					e->fsz);
-			fputs("\tret\n", f);
+			fprintf(f,
+				"\tleave\n"
+				"\tret\n"
+			);
 			break;
 		case Jjmp:
 		Jmp:
diff --git a/amd64/isel.c b/amd64/isel.c
index bd645ce..2b92878 100644
--- a/amd64/isel.c
+++ b/amd64/isel.c
@@ -808,7 +808,6 @@ amd64_isel(Fn *fn)
 					die("alloc too large");
 				fn->tmp[i->to.val].slot = fn->slot;
 				fn->slot += sz;
-				fn->salign = 2 + al - Oalloc;
 				*i = (Ins){.op = Onop};
 			}
 
diff --git a/arm64/emit.c b/arm64/emit.c
index 28cd6a5..ffdc178 100644
--- a/arm64/emit.c
+++ b/arm64/emit.c
@@ -160,8 +160,7 @@ emitf(char *s, Ins *i, E *e)
 	Ref r;
 	int k, c;
 	Con *pc;
-	uint64_t n;
-	uint sp;
+	uint n, sp;
 
 	fputc('\t', e->f);
 
@@ -218,17 +217,10 @@ emitf(char *s, Ins *i, E *e)
 				pc = &e->fn->con[r.val];
 				n = pc->bits.i;
 				assert(pc->type == CBits);
-				if (n >> 24) {
-					assert(arm64_logimm(n, k));
-					fprintf(e->f, "#%"PRIu64, n);
-				} else if (n & 0xfff000) {
-					assert(!(n & ~0xfff000ull));
-					fprintf(e->f, "#%"PRIu64", lsl #12",
-						n>>12);
-				} else {
-					assert(!(n & ~0xfffull));
-					fprintf(e->f, "#%"PRIu64, n);
-				}
+				if (n & 0xfff000)
+					fprintf(e->f, "#%u, lsl #12", n>>12);
+				else
+					fprintf(e->f, "#%u", n);
 				break;
 			}
 			break;
@@ -312,7 +304,6 @@ loadcon(Con *c, int r, int k, E *e)
 	rn = rname(r, k);
 	n = c->bits.i;
 	if (c->type == CAddr) {
-		rn = rname(r, Kl);
 		loadaddr(c, rn, e);
 		return;
 	}
diff --git a/arm64/isel.c b/arm64/isel.c
index 9ce6adc..062beb3 100644
--- a/arm64/isel.c
+++ b/arm64/isel.c
@@ -24,7 +24,7 @@ imm(Con *c, int k, int64_t *pn)
 	i = Iplo12;
 	if (n < 0) {
 		i = Inlo12;
-		n = -(uint64_t)n;
+		n = -n;
 	}
 	*pn = n;
 	if ((n & 0x000fff) == n)
diff --git a/parse.c b/parse.c
index e896679..a745779 100644
--- a/parse.c
+++ b/parse.c
@@ -694,7 +694,6 @@ parseline(PState ps)
 		goto Ins;
 	}
 	if (op == Tcall) {
-		curf->leaf = 0;
 		arg[0] = parseref();
 		parserefl(1);
 		op = Ocall;
@@ -911,7 +910,6 @@ parsefn(Lnk *lnk)
 	curf->con[0].bits.i = 0xdeaddead;  /* UNDEF */
 	curf->con[1].type = CBits;
 	curf->lnk = *lnk;
-	curf->leaf = 1;
 	blink = &curf->start;
 	curf->retty = Kx;
 	if (peek() != Tglo)
@@ -1221,7 +1219,6 @@ parse(FILE *f, char *path, void dbgfile(char *), void data(Dat *), void func(Fn
 			dbgfile(tokval.str);
 			break;
 		case Tfunc:
-			lnk.align = 16;
 			func(parsefn(&lnk));
 			break;
 		case Tdata: