amd64/isel: fix floating point == and != result with NaN

On x86_64, ucomis[sd] sets ZF=1, PF=0, CF=0 for equal arguments.
However, if the arguments are unordered it sets ZF=1, PF=1, CF=1,
and there is no jump/flag instruction for ZF=1 & PF=0 or ZF=1 & CF=0.

So, in order to correctly implement ceq[sd] on x86_64, we need to
be a bit more creative. There are several options available, depending
on whether the result of ceq[sd] is used with jnz, or with other
instructions, or both.

If the result is used for a conditional jump, both gcc and clang
use a combination of jp and jnz:

	ucomisd %xmm1, %xmm0
	jp .Lfalse
	jnz .Lfalse
	...
	.Lfalse:

If the result is used in other instructions or return, gcc does the
following for x == y:

	ucomisd %xmm1, %xmm0
	setnp %al
	movzbl %al, %eax
	movl $0, %edx
	cmovne %edx, %eax

This sets EAX to PF=0, then uses cmovne to clear it if ZF=0. It
also takes care to avoid clobbering the flags register in case the
result is also used for a conditional jump. Implementing this
approach in QBE would require adding an architecture-specific
instruction for cmovne.

In contrast, clang does an additional compare, this time using
cmpeqsd instead of ucomisd:

	cmpeqsd %xmm1, %xmm0
	movq %xmm0, %rax
	andl $1, %rax

The cmpeqsd instruction doas a floating point equality test, setting
XMM0 to all 1s if they are equal and all 0s if they are not. However,
we need the result in a non-XMM register, so it moves the result
back then masks off all but the first bit.

Both of these approaches are a bit awkward to implement in QBE, so
instead, this commit does the following:

	ucomisd %xmm1, %xmm0
	setz %al
	movzbl %al, %eax
	setnp %cl
	movzbl %cl, %ecx
	andl %ecx, %eax

This sets the result by anding the two flags, but has a side effect
of clobbering the flags register. This was a problem in one of my
earlier patches to fix this issue[0], in addition to being more
complex than I'd hoped.

Instead, this commit always leaves the ceq[sd] instruction in the
block, even if the result is only used to control a jump, so that
the above instruction sequence is always used. Then, since we now
have ZF=!(ZF=1 & PF=0) for x == y, or ZF=!(ZF=0 | PF=1) for x != y,
we can use jnz for the jump instruction.

[0] 64833841b1
This commit is contained in:
Michael Forney 2021-08-22 12:55:02 -07:00 committed by Quentin Carbonneaux
parent 804921a3ab
commit 7ac88f5d48
2 changed files with 30 additions and 10 deletions

View file

@ -344,6 +344,26 @@ Emit:
if (isload(i.op))
goto case_Oload;
if (iscmp(i.op, &kc, &x)) {
switch (x) {
case NCmpI+Cfeq:
/* zf is set when operands are
* unordered, so we may have to
* check pf
*/
r0 = newtmp("isel", Kw, fn);
r1 = newtmp("isel", Kw, fn);
emit(Oand, Kw, i.to, r0, r1);
emit(Oflagfo, k, r1, R, R);
i.to = r0;
break;
case NCmpI+Cfne:
r0 = newtmp("isel", Kw, fn);
r1 = newtmp("isel", Kw, fn);
emit(Oor, Kw, i.to, r0, r1);
emit(Oflagfuo, k, r1, R, R);
i.to = r0;
break;
}
swap = cmpswap(i.arg, x);
if (swap)
x = cmpop(x);
@ -388,7 +408,7 @@ seljmp(Blk *b, Fn *fn)
r = b->jmp.arg;
t = &fn->tmp[r.val];
b->jmp.arg = R;
assert(!req(r, R) && rtype(r) != RCon);
assert(rtype(r) == RTmp);
if (b->s1 == b->s2) {
chuse(r, -1, fn);
b->jmp.type = Jjmp;
@ -400,7 +420,9 @@ seljmp(Blk *b, Fn *fn)
selcmp((Ref[2]){r, CON_Z}, Kw, 0, fn); /* todo, long jnz */
b->jmp.type = Jjf + Cine;
}
else if (iscmp(fi->op, &k, &c)) {
else if (iscmp(fi->op, &k, &c)
&& c != NCmpI+Cfeq /* see sel() */
&& c != NCmpI+Cfne) {
swap = cmpswap(fi->arg, c);
if (swap)
c = cmpop(c);