use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64ArrayEqualsOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Register result = asRegister(resultValue);
Register array1 = asRegister(temp1);
Register array2 = asRegister(temp2);
Register length = asRegister(temp3);
Label trueLabel = new Label();
Label falseLabel = new Label();
Label done = new Label();
// Load array base addresses.
masm.leaq(array1, new AMD64Address(asRegister(array1Value), arrayBaseOffset));
masm.leaq(array2, new AMD64Address(asRegister(array2Value), arrayBaseOffset));
// Get array length in bytes.
masm.movl(length, asRegister(lengthValue));
if (arrayIndexScale > 1) {
// scale length
masm.shll(length, NumUtil.log2Ceil(arrayIndexScale));
}
// copy
masm.movl(result, length);
if (supportsAVX2(crb.target)) {
emitAVXCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
} else if (supportsSSE41(crb.target)) {
// this code is used for AVX as well because our backend correctly ensures that
// VEX-prefixed instructions are emitted if AVX is supported
emitSSE41Compare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
}
emit8ByteCompare(crb, masm, result, array1, array2, length, trueLabel, falseLabel);
emitTailCompares(masm, result, array1, array2, length, trueLabel, falseLabel);
// Return true
masm.bind(trueLabel);
masm.movl(result, 1);
masm.jmpb(done);
// Return false
masm.bind(falseLabel);
masm.xorl(result, result);
// That's it
masm.bind(done);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64ArrayEqualsOp method emitFloatCompare.
/**
* Emits code to compare if two floats are bitwise equal or both NaN.
*/
private void emitFloatCompare(AMD64MacroAssembler masm, Register base1, Register base2, Register index, int offset, Label falseLabel, boolean skipBitwiseCompare) {
AMD64Address address1 = new AMD64Address(base1, index, Scale.Times1, offset);
AMD64Address address2 = new AMD64Address(base2, index, Scale.Times1, offset);
Label bitwiseEqual = new Label();
if (!skipBitwiseCompare) {
// Bitwise compare
Register temp = asRegister(temp4);
if (kind == JavaKind.Float) {
masm.movl(temp, address1);
masm.cmpl(temp, address2);
} else {
masm.movq(temp, address1);
masm.cmpq(temp, address2);
}
masm.jccb(ConditionFlag.Equal, bitwiseEqual);
}
emitNaNCheck(masm, address1, falseLabel);
emitNaNCheck(masm, address2, falseLabel);
masm.bind(bitwiseEqual);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64ArrayEqualsOp method emitSSE41Compare.
/**
* Emits code that uses SSE4.1 128-bit (16-byte) vector compares.
*/
private void emitSSE41Compare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
assert supportsSSE41(crb.target);
Register vector1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
Register vector2 = asRegister(vectorTemp2, AMD64Kind.DOUBLE);
Label loop = new Label();
Label compareTail = new Label();
boolean requiresNaNCheck = kind.isNumericFloat();
Label loopCheck = new Label();
Label nanCheck = new Label();
// Compare 16-byte vectors
// tail count (in bytes)
masm.andl(result, SSE4_1_VECTOR_SIZE - 1);
// vector count (in bytes)
masm.andl(length, ~(SSE4_1_VECTOR_SIZE - 1));
masm.jcc(ConditionFlag.Zero, compareTail);
masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
masm.negq(length);
// Align the main loop
masm.align(crb.target.wordSize * 2);
masm.bind(loop);
masm.movdqu(vector1, new AMD64Address(array1, length, Scale.Times1, 0));
masm.movdqu(vector2, new AMD64Address(array2, length, Scale.Times1, 0));
masm.pxor(vector1, vector2);
masm.ptest(vector1, vector1);
masm.jcc(ConditionFlag.NotZero, requiresNaNCheck ? nanCheck : falseLabel);
masm.bind(loopCheck);
masm.addq(length, SSE4_1_VECTOR_SIZE);
masm.jcc(ConditionFlag.NotZero, loop);
masm.testl(result, result);
masm.jcc(ConditionFlag.Zero, trueLabel);
if (requiresNaNCheck) {
Label unalignedCheck = new Label();
masm.jmpb(unalignedCheck);
masm.bind(nanCheck);
emitFloatCompareWithinRange(crb, masm, array1, array2, length, 0, falseLabel, SSE4_1_VECTOR_SIZE);
masm.jmpb(loopCheck);
masm.bind(unalignedCheck);
}
/*
* Compare the remaining bytes with an unaligned memory load aligned to the end of the
* array.
*/
masm.movdqu(vector1, new AMD64Address(array1, result, Scale.Times1, -SSE4_1_VECTOR_SIZE));
masm.movdqu(vector2, new AMD64Address(array2, result, Scale.Times1, -SSE4_1_VECTOR_SIZE));
masm.pxor(vector1, vector2);
masm.ptest(vector1, vector1);
if (requiresNaNCheck) {
masm.jcc(ConditionFlag.Zero, trueLabel);
emitFloatCompareWithinRange(crb, masm, array1, array2, result, -SSE4_1_VECTOR_SIZE, falseLabel, SSE4_1_VECTOR_SIZE);
} else {
masm.jcc(ConditionFlag.NotZero, falseLabel);
}
masm.jmp(trueLabel);
masm.bind(compareTail);
masm.movl(length, result);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method expIntrinsic.
public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16);
ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16);
ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8);
ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16);
ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16);
ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16);
ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8);
ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8);
ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8);
ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8);
ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8);
ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Label bb9 = new Label();
Label bb10 = new Label();
Label bb11 = new Label();
Label bb12 = new Label();
Label bb14 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movsd(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
// 0xffffffc0,
masm.movdqu(temp9, externalAddress(mMaskExpPtr));
// 0x00000000,
// 0xffffffc0,
// 0x00000000
// 0x0000ffc0,
masm.movdqu(temp10, externalAddress(biasExpPtr));
// 0x00000000,
// 0x0000ffc0,
// 0x00000000
masm.unpcklpd(dest, dest);
masm.leaq(gpr5, stackSlot);
masm.leaq(gpr2, externalAddress(cvExpPtr));
// 0x652b82fe,
masm.movdqu(temp1, new AMD64Address(gpr2, 0));
// 0x40571547,
// 0x652b82fe,
// 0x40571547
// 0x00000000,
masm.movdqu(temp6, externalAddress(shifterExpPtr));
// 0x43380000,
// 0x00000000,
// 0x43380000
// 0xfefa0000,
masm.movdqu(temp2, new AMD64Address(gpr2, 16));
// 0x3f862e42,
// 0xfefa0000,
// 0x3f862e42
// 0xbc9e3b3a,
masm.movdqu(temp3, new AMD64Address(gpr2, 32));
// 0x3d1cf79a,
// 0xbc9e3b3a,
// 0x3d1cf79a
masm.pextrw(gpr1, dest, 3);
masm.andl(gpr1, 32767);
masm.movl(gpr4, 16527);
masm.subl(gpr4, gpr1);
masm.subl(gpr1, 15504);
masm.orl(gpr4, gpr1);
masm.cmpl(gpr4, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.leaq(gpr4, externalAddress(tblAddrExpPtr));
// 0xfffffffe,
masm.movdqu(temp8, new AMD64Address(gpr2, 48));
// 0x3fdfffff,
// 0xfffffffe,
// 0x3fdfffff
// 0xe3289860,
masm.movdqu(temp4, new AMD64Address(gpr2, 64));
// 0x3f56c15c,
// 0x555b9e25,
// 0x3fa55555
// 0xc090cf0f,
masm.movdqu(temp5, new AMD64Address(gpr2, 80));
// 0x3f811115,
// 0x55548ba1,
// 0x3fc55555
masm.mulpd(temp1, dest);
masm.addpd(temp1, temp6);
masm.movapd(temp7, temp1);
masm.movdl(gpr1, temp1);
masm.pand(temp7, temp9);
masm.subpd(temp1, temp6);
masm.mulpd(temp2, temp1);
masm.mulpd(temp3, temp1);
masm.paddq(temp7, temp10);
masm.subpd(dest, temp2);
masm.movl(gpr3, gpr1);
masm.andl(gpr3, 63);
masm.shll(gpr3, 4);
masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0));
masm.sarl(gpr1, 6);
masm.psllq(temp7, 46);
masm.subpd(dest, temp3);
masm.mulpd(temp4, dest);
masm.movl(gpr4, gpr1);
masm.movapd(temp6, dest);
masm.movapd(temp1, dest);
masm.mulpd(temp6, temp6);
masm.mulpd(dest, temp6);
masm.addpd(temp5, temp4);
masm.mulsd(dest, temp6);
masm.mulpd(temp6, temp8);
masm.addsd(temp1, temp2);
masm.unpckhpd(temp2, temp2);
masm.mulpd(dest, temp5);
masm.addsd(temp1, dest);
masm.por(temp2, temp7);
masm.unpckhpd(dest, dest);
masm.addsd(dest, temp1);
masm.addsd(dest, temp6);
masm.addl(gpr4, 894);
masm.cmpl(gpr4, 1916);
masm.jcc(ConditionFlag.Above, bb1);
masm.mulsd(dest, temp2);
masm.addsd(dest, temp2);
masm.jmp(bb14);
masm.bind(bb1);
// 0x00000000,
masm.movdqu(temp6, externalAddress(expBiasPtr));
// 0x3ff00000,
// 0x00000000,
// 0x3ff00000
masm.xorpd(temp3, temp3);
// 0xffffffff,
masm.movdqu(temp4, externalAddress(allOnesExpPtr));
// 0xffffffff,
// 0xffffffff,
// 0xffffffff
masm.movl(gpr4, -1022);
masm.subl(gpr4, gpr1);
masm.movdl(temp5, gpr4);
masm.psllq(temp4, temp5);
masm.movl(gpr3, gpr1);
masm.sarl(gpr1, 1);
masm.pinsrw(temp3, gpr1, 3);
masm.psllq(temp3, 4);
masm.psubd(temp2, temp3);
masm.mulsd(dest, temp2);
masm.cmpl(gpr4, 52);
masm.jcc(ConditionFlag.Greater, bb2);
masm.pand(temp4, temp2);
masm.paddd(temp3, temp6);
masm.subsd(temp2, temp4);
masm.addsd(dest, temp2);
masm.cmpl(gpr3, 1023);
masm.jcc(ConditionFlag.GreaterEqual, bb3);
masm.pextrw(gpr3, dest, 3);
masm.andl(gpr3, 32768);
masm.orl(gpr4, gpr3);
masm.cmpl(gpr4, 0);
masm.jcc(ConditionFlag.Equal, bb4);
masm.movapd(temp6, dest);
masm.addsd(dest, temp4);
masm.mulsd(dest, temp3);
masm.pextrw(gpr3, dest, 3);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb5);
masm.jmp(bb14);
masm.bind(bb5);
masm.mulsd(temp6, temp3);
masm.mulsd(temp4, temp3);
masm.movdqu(dest, temp6);
masm.pxor(temp6, temp4);
masm.psrad(temp6, 31);
masm.pshufd(temp6, temp6, 85);
masm.psllq(dest, 1);
masm.psrlq(dest, 1);
masm.pxor(dest, temp6);
masm.psrlq(temp6, 63);
masm.paddq(dest, temp6);
masm.paddq(dest, temp4);
masm.jmp(bb14);
masm.bind(bb4);
masm.addsd(dest, temp4);
masm.mulsd(dest, temp3);
masm.jmp(bb14);
masm.bind(bb3);
masm.addsd(dest, temp4);
masm.mulsd(dest, temp3);
masm.pextrw(gpr3, dest, 3);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 32752);
masm.jcc(ConditionFlag.AboveEqual, bb7);
masm.jmp(bb14);
masm.bind(bb2);
masm.paddd(temp3, temp6);
masm.addpd(dest, temp2);
masm.mulsd(dest, temp3);
masm.jmp(bb14);
masm.bind(bb8);
// 0xffffffff,
masm.movsd(dest, externalAddress(xMaxExpPtr));
// 0x7fefffff
// 0x00000000,
masm.movsd(temp8, externalAddress(xMinExpPtr));
// 0x00100000
masm.cmpl(gpr1, 2146435072);
masm.jcc(ConditionFlag.AboveEqual, bb9);
masm.movl(gpr1, new AMD64Address(gpr5, 4));
masm.cmpl(gpr1, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.AboveEqual, bb10);
masm.mulsd(dest, dest);
masm.bind(bb7);
masm.jmp(bb14);
masm.bind(bb10);
masm.mulsd(dest, temp8);
masm.jmp(bb14);
masm.bind(bb9);
masm.movl(gpr4, stackSlot);
masm.cmpl(gpr1, 2146435072);
masm.jcc(ConditionFlag.Above, bb11);
masm.cmpl(gpr4, 0);
masm.jcc(ConditionFlag.NotEqual, bb11);
masm.movl(gpr1, new AMD64Address(gpr5, 4));
masm.cmpl(gpr1, 2146435072);
masm.jcc(ConditionFlag.NotEqual, bb12);
// 0x00000000,
masm.movsd(dest, externalAddress(infExpPtr));
// 0x7ff00000
masm.jmp(bb14);
masm.bind(bb12);
// 0x00000000,
masm.movsd(dest, externalAddress(zeroExpPtr));
// 0x00000000
masm.jmp(bb14);
masm.bind(bb11);
masm.movsd(dest, stackSlot);
masm.addsd(dest, dest);
masm.jmp(bb14);
masm.bind(bb0);
masm.movl(gpr1, new AMD64Address(gpr5, 4));
masm.andl(gpr1, 2147483647);
masm.cmpl(gpr1, 1083179008);
masm.jcc(ConditionFlag.AboveEqual, bb8);
// 0x00000000,
masm.addsd(dest, externalAddress(onePtr));
// 0x3ff00000
masm.bind(bb14);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method logIntrinsic.
/*
* Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
* Source Code
*
* ALGORITHM DESCRIPTION - LOG() ---------------------
*
* x=2^k * mx, mx in [1,2)
*
* Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7
*
* Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
*
* Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7
* polynomial -log(B) read from data table (high, low parts) Result is formed from high and low
* parts.
*
* Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0)
* = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception
* raised if x < -0, including -INF
*
*/
public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16);
ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16);
ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movdq(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
masm.movq(gpr1, 0x3ff0000000000000L);
masm.movdq(temp2, gpr1);
masm.movq(gpr3, 0x77f0000000000000L);
masm.movdq(temp3, gpr3);
masm.movl(gpr2, 32768);
masm.movdl(temp4, gpr2);
masm.movq(gpr2, 0xffffe00000000000L);
masm.movdq(temp5, gpr2);
masm.movdqu(temp1, value);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 16352);
masm.psrlq(dest, 27);
masm.leaq(gpr4, externalAddress(logTwoTablePtr));
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0xE4);
masm.psrlq(temp1, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.bind(bb1);
masm.paddd(dest, temp4);
masm.por(temp1, temp3);
masm.movdl(gpr3, dest);
masm.psllq(dest, 29);
masm.pand(temp5, temp1);
masm.pand(dest, temp6);
masm.subsd(temp1, temp5);
masm.mulpd(temp5, dest);
masm.andl(gpr1, 32752);
masm.subl(gpr1, gpr2);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulsd(temp1, dest);
// 0xfefa3800,
masm.movdq(temp6, externalAddress(logTwoDataPtr));
// 0x3fa62e42
// 0x92492492,
masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr));
// 0x3fc24924,
// 0x00000000,
// 0xbfd00000
masm.subsd(temp5, temp2);
masm.andl(gpr3, 16711680);
masm.shrl(gpr3, 12);
masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0));
masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr));
// 0x3d6fb175,
masm.movdqu(temp4, new AMD64Address(gpr4, 16));
// 0xbfc5555e,
// 0x55555555,
// 0x3fd55555
masm.addsd(temp1, temp5);
// 0x9999999a,
masm.movdqu(temp2, new AMD64Address(gpr4, 32));
// 0x3fc99999,
// 0x00000000,
// 0xbfe00000
masm.mulsd(temp6, temp7);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp5, temp1);
} else {
masm.movdqu(temp5, temp1);
masm.movlhps(temp5, temp5);
}
masm.leaq(gpr4, externalAddress(logTwoDataPtr));
// 0x93c76730,
masm.mulsd(temp7, new AMD64Address(gpr4, 8));
// 0x3ceef357
masm.mulsd(temp3, temp1);
masm.addsd(dest, temp6);
masm.mulpd(temp4, temp5);
masm.mulpd(temp5, temp5);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp6, dest);
} else {
masm.movdqu(temp6, dest);
masm.movlhps(temp6, temp6);
}
masm.addsd(dest, temp1);
masm.addpd(temp4, temp2);
masm.mulpd(temp3, temp5);
masm.subsd(temp6, dest);
masm.mulsd(temp4, temp1);
masm.pshufd(temp2, dest, 0xEE);
masm.addsd(temp1, temp6);
masm.mulsd(temp5, temp5);
masm.addsd(temp7, temp2);
masm.addpd(temp4, temp3);
masm.addsd(temp1, temp7);
masm.mulpd(temp4, temp5);
masm.addsd(temp1, temp4);
masm.pshufd(temp5, temp4, 0xEE);
masm.addsd(temp1, temp5);
masm.addsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb0);
masm.movdq(dest, stackSlot);
masm.movdq(temp1, stackSlot);
masm.addl(gpr1, 16);
masm.cmpl(gpr1, 32768);
masm.jcc(ConditionFlag.AboveEqual, bb2);
masm.cmpl(gpr1, 16);
masm.jcc(ConditionFlag.Below, bb3);
masm.bind(bb4);
masm.addsd(dest, dest);
masm.jmp(bb8);
masm.bind(bb5);
masm.jcc(ConditionFlag.Above, bb4);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Above, bb4);
masm.jmp(bb6);
masm.bind(bb3);
masm.xorpd(temp1, temp1);
masm.addsd(temp1, dest);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.xorpd(temp1, temp1);
masm.movl(gpr1, 18416);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.movdqu(temp1, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.psrlq(dest, 27);
masm.movl(gpr2, 18416);
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0xE4);
masm.psrlq(temp1, 12);
masm.jmp(bb1);
masm.bind(bb2);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.addl(gpr2, gpr2);
masm.cmpl(gpr2, -2097152);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.bind(bb6);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 49136);
masm.pinsrw(dest, gpr1, 3);
masm.divsd(dest, temp1);
masm.bind(bb8);
}
Aggregations