Simplify int mul/udiv/urem of 2^N into shl/shr/and.

Passes the "standard" test suite. (cproc bootstrap, hare[c] make test, roland units, linpack/coremark run) However linpack benchmark is now notably slower. Coremark is ~2% faster. As noticed before, linmark timing is dubious, and maybe my cheap (AMD) laptop prefers mul to shl.
2024-06-13 20:58:52 +02:00 · 2024-06-13 20:58:52 +02:00 · 7a19de5cf4
commit 7a19de5cf4
parent cf9f95f521
1 changed files with 52 additions and 2 deletions
--- a/simpl.c
+++ b/simpl.c
@ -30,16 +30,43 @@ blit(Ref sd[2], int sz, Fn *fn)
 		}
 }

+static int
+ulog2_tab64[64] = {
+	63,  0, 58,  1, 59, 47, 53,  2,
+	60, 39, 48, 27, 54, 33, 42,  3,
+	61, 51, 37, 40, 49, 18, 28, 20,
+	55, 30, 34, 11, 43, 14, 22,  4,
+	62, 57, 46, 52, 38, 26, 32, 41,
+	50, 36, 17, 19, 29, 10, 13, 21,
+	56, 45, 25, 31, 35, 16,  9, 12,
+	44, 24, 15,  8, 23,  7,  6,  5
+};
+
+static int
+ulog2(uint64_t pow2)
+{
+	return ulog2_tab64[(pow2 * 0x07EDD5E59A4E28C2) >> 58];
+}
+
+static int
+ispow2(uint64_t v)
+{
+	return (v & (v - 1)) == 0;
+}
+
 static void
 ins(Ins **pi, int *new, Blk *b, Fn *fn)
 {
 	ulong ni;
+	Con *c;
 	Ins *i;
+	Ref r;
+	int n;

 	i = *pi;
 	/* simplify more instructions here;
-	 * copy 0 into xor, mul 2^n into shift,
-	 * bit rotations, ... */
+	 * copy 0 into xor bit rotations,
+	 * etc. */
 	switch (i->op) {
 	case Oblit1:
 		assert(i > b->ins);
@ -54,6 +81,29 @@ ins(Ins **pi, int *new, Blk *b, Fn *fn)
 		blit((i-1)->arg, rsval(i->arg[0]), fn);
 		*pi = i-1;
 		break;
+	case Omul:
+	case Oudiv:
+	case Ourem:
+		r = i->arg[1];
+		if (KBASE(i->cls) == 0)
+		if (rtype(r) == RCon) {
+			c = &fn->con[r.val];
+			if (c->type == CBits)
+			if (ispow2(c->bits.i)) {
+				n = ulog2(c->bits.i);
+				if (i->op == Ourem) {
+					i->op = Oand;
+					i->arg[1] = getcon((1ull<<n) - 1, fn);
+				} else if (i->op == Oudiv) {
+					i->op = Oshr;
+					i->arg[1] = getcon(n, fn);
+				} else if (i->op == Omul) {
+					i->op = Oshl;
+					i->arg[1] = getcon(n, fn);
+				}
+			}
+		}
+		/* fall through */
 	default:
 		if (*new)
 			emiti(*i);