Simplify int mul/udiv/urem of 2^N into shl/shr/and.
Passes the "standard" test suite. (cproc bootstrap, hare[c] make test, roland units, linpack/coremark run) However linpack benchmark is now notably slower. Coremark is ~2% faster. As noticed before, linmark timing is dubious, and maybe my cheap (AMD) laptop prefers mul to shl.
This commit is contained in:
parent
cf9f95f521
commit
7a19de5cf4
1 changed files with 52 additions and 2 deletions
54
simpl.c
54
simpl.c
|
@ -30,16 +30,43 @@ blit(Ref sd[2], int sz, Fn *fn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ulog2_tab64[64] = {
|
||||||
|
63, 0, 58, 1, 59, 47, 53, 2,
|
||||||
|
60, 39, 48, 27, 54, 33, 42, 3,
|
||||||
|
61, 51, 37, 40, 49, 18, 28, 20,
|
||||||
|
55, 30, 34, 11, 43, 14, 22, 4,
|
||||||
|
62, 57, 46, 52, 38, 26, 32, 41,
|
||||||
|
50, 36, 17, 19, 29, 10, 13, 21,
|
||||||
|
56, 45, 25, 31, 35, 16, 9, 12,
|
||||||
|
44, 24, 15, 8, 23, 7, 6, 5
|
||||||
|
};
|
||||||
|
|
||||||
|
static int
|
||||||
|
ulog2(uint64_t pow2)
|
||||||
|
{
|
||||||
|
return ulog2_tab64[(pow2 * 0x07EDD5E59A4E28C2) >> 58];
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
ispow2(uint64_t v)
|
||||||
|
{
|
||||||
|
return (v & (v - 1)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ins(Ins **pi, int *new, Blk *b, Fn *fn)
|
ins(Ins **pi, int *new, Blk *b, Fn *fn)
|
||||||
{
|
{
|
||||||
ulong ni;
|
ulong ni;
|
||||||
|
Con *c;
|
||||||
Ins *i;
|
Ins *i;
|
||||||
|
Ref r;
|
||||||
|
int n;
|
||||||
|
|
||||||
i = *pi;
|
i = *pi;
|
||||||
/* simplify more instructions here;
|
/* simplify more instructions here;
|
||||||
* copy 0 into xor, mul 2^n into shift,
|
* copy 0 into xor bit rotations,
|
||||||
* bit rotations, ... */
|
* etc. */
|
||||||
switch (i->op) {
|
switch (i->op) {
|
||||||
case Oblit1:
|
case Oblit1:
|
||||||
assert(i > b->ins);
|
assert(i > b->ins);
|
||||||
|
@ -54,6 +81,29 @@ ins(Ins **pi, int *new, Blk *b, Fn *fn)
|
||||||
blit((i-1)->arg, rsval(i->arg[0]), fn);
|
blit((i-1)->arg, rsval(i->arg[0]), fn);
|
||||||
*pi = i-1;
|
*pi = i-1;
|
||||||
break;
|
break;
|
||||||
|
case Omul:
|
||||||
|
case Oudiv:
|
||||||
|
case Ourem:
|
||||||
|
r = i->arg[1];
|
||||||
|
if (KBASE(i->cls) == 0)
|
||||||
|
if (rtype(r) == RCon) {
|
||||||
|
c = &fn->con[r.val];
|
||||||
|
if (c->type == CBits)
|
||||||
|
if (ispow2(c->bits.i)) {
|
||||||
|
n = ulog2(c->bits.i);
|
||||||
|
if (i->op == Ourem) {
|
||||||
|
i->op = Oand;
|
||||||
|
i->arg[1] = getcon((1ull<<n) - 1, fn);
|
||||||
|
} else if (i->op == Oudiv) {
|
||||||
|
i->op = Oshr;
|
||||||
|
i->arg[1] = getcon(n, fn);
|
||||||
|
} else if (i->op == Omul) {
|
||||||
|
i->op = Oshl;
|
||||||
|
i->arg[1] = getcon(n, fn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* fall through */
|
||||||
default:
|
default:
|
||||||
if (*new)
|
if (*new)
|
||||||
emiti(*i);
|
emiti(*i);
|
||||||
|
|
Loading…
Add table
Reference in a new issue