From e5983ba1a2174fe7fd004692a427aae5bfc0dc4c Mon Sep 17 00:00:00 2001
From: Quentin Carbonneaux <quentin.carbonneaux@yale.edu>
Date: Wed, 5 Aug 2015 21:46:15 -0400
Subject: [PATCH] fix 2 bad bugs in rega and improve substraction

The substraction contrained the register allocator
to allocate a different register for the result and
the second operand, now, we use a neg trick to compile
it down.  The machinery that was setup is, regardless,
interesting and will have to be used for floating
point computations (division).

The first bug in rega made broke the explicited loop
invariant:  we were using register allocation unavailable
information from other blocks.  It's still unclear
how we got wrong results from that considering mappings
are all 0-initialized.

The second bug is a stupid one, one sizeof operator was
missing from a memcpy...
---
 lisc/emit.c  |  7 ++++++-
 lisc/rega.c  | 27 ++++++++++-----------------
 lisc/spill.c | 17 +----------------
 3 files changed, 17 insertions(+), 34 deletions(-)
diff --git a/lisc/emit.c b/lisc/emit.c
index 1867097..a1a70c4 100644
--- a/lisc/emit.c
+++ b/lisc/emit.c
@@ -128,7 +128,12 @@ eins(Ins i, Fn *fn, FILE *f)
 	case OAdd:
 	case OSub:
 		if (req(i.to, i.arg[1])) {
-			if (!opdesc[i.op].comm)
+			if (i.op == OSub) {
+				eop("neg", i.to, R, fn, f);
+				eop("add", i.arg[0], i.to, fn, f);
+				break;
+			}
+			if (opdesc[i.op].comm != T)
 				diag("emit: unhandled instruction (1)");
 			i.arg[1] = i.arg[0];
 			i.arg[0] = i.to;
diff --git a/lisc/rega.c b/lisc/rega.c
index 11fc0f4..6658075 100644
--- a/lisc/rega.c
+++ b/lisc/rega.c
@@ -102,8 +102,8 @@ rfree(RMap *m, int t)
 	BCLR(m->bt, t);
 	BCLR(m->br, r);
 	m->n--;
-	memmove(&m->t[i], &m->t[i+1], (m->n-i) * m->t[0]);
-	memmove(&m->r[i], &m->r[i+1], (m->n-i) * m->r[0]);
+	memmove(&m->t[i], &m->t[i+1], (m->n-i) * sizeof m->t[0]);
+	memmove(&m->r[i], &m->r[i+1], (m->n-i) * sizeof m->r[0]);
 	return r;
 }
 
@@ -293,11 +293,14 @@ rega(Fn *fn)
 		cur.n = 0;
 		cur.bt = (Bits){{0}};
 		cur.br = (Bits){{0}};
-		b1 = b->s1;
-		if (b1 && b->s2 && b->s2->loop > b1->loop)
-			b1 = b->s2;
-		if (b1 && b->loop > b1->loop)
-			b1 = 0;
+		b1 = 0;
+		if (b->s1 && b->s1->id > n) {
+			if (b->s1->loop > b->loop)
+				b1 = b->s1;
+			if (b->s2 && b->s2->id > n)
+			if (b->s2->loop > b1->loop)
+				b1 = b->s2;
+		}
 		/* try to reuse the register
 		 * assignment of the most frequent
 		 * successor
@@ -362,18 +365,8 @@ rega(Fn *fn)
 			}
 			switch (rtype(i->arg[1])) {
 			case RTmp:
-				/* <arch>
-				 *   on Intel, we have to
-				 *   make sure we avoid the
-				 *   situation
-				 *   eax = sub ebx, eax
-				 */
-				if (opdesc[i->op].comm == F && r)
-					BSET(cur.br, r);
 				t = i->arg[1].val;
 				i->arg[1] = ralloc(&cur, t);
-				if (opdesc[i->op].comm == F && r)
-					BCLR(cur.br, r);
 				break;
 			case RReg:
 				BSET(cur.br, BASE(i->arg[1].val));
diff --git a/lisc/spill.c b/lisc/spill.c
index 8b7c67e..7361b11 100644
--- a/lisc/spill.c
+++ b/lisc/spill.c
@@ -361,22 +361,7 @@ spill(Fn *fn)
 			case -1:;
 			}
 			w = (Bits){{0}};
-			if (rtype(i->arg[1]) == RTmp
-			&& !req(i->to, R)
-			&& opdesc[i->op].comm == F) {
-				/* <arch>
-				 *   here we make sure that we
-				 *   will never have to compile
-				 *   say: eax = sub ebx, eax
-				 *   on a two-address machine
-				 */
-				BSET(w, i->to.val);
-				BSET(v, i->to.val);
-				setloc(&i->arg[1], &v, &w);
-				BCLR(v, i->to.val);
-				BCLR(w, i->to.val);
-			} else
-				setloc(&i->arg[1], &v, &w);
+			setloc(&i->arg[1], &v, &w);
 			setloc(&i->arg[0], &v, &w);
 			if (s)
 				emit(OStore, R, i->to, SLOT(s));