Search in sources :

Example 31 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method sinIntrinsic.

public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
    ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
    ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
    ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
    ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
    ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
    ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
    ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
    ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
    ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
    ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8);
    ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
    ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
    ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8);
    ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8);
    ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8);
    ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movsd(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.leaq(gpr1, stackSlot);
    masm.movl(gpr1, new AMD64Address(gpr1, 4));
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    // 0x00000000,
    masm.movdq(temp2, externalAddress(shifterPtr));
    // 0x43380000
    masm.andl(gpr1, 2147418112);
    masm.subl(gpr1, 808452096);
    masm.cmpl(gpr1, 281346048);
    masm.jcc(ConditionFlag.Above, bb0);
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x1a600000,
    masm.movdqu(temp6, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.movq(gpr7, 0x3fb921fb54400000L);
    masm.movdq(temp3, gpr7);
    // 0xa556c734,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3ec71de3,
    // 0x1a01a01a,
    // 0x3efa01a0
    masm.pshufd(temp4, dest, 0x44);
    masm.mulsd(temp3, temp1);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp1, temp1);
    } else {
        masm.movlhps(temp1, temp1);
    }
    masm.andl(gpr4, 63);
    masm.shll(gpr4, 5);
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp6, temp1);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.subsd(dest, temp3);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp3, temp4);
    } else {
        masm.movdqu(temp3, temp4);
        masm.movlhps(temp3, temp3);
    }
    masm.subsd(temp4, temp6);
    masm.pshufd(dest, dest, 0x44);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp2, temp8);
    masm.movdqu(temp9, temp7);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp6);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.shrl(gpr1, 20);
    masm.cmpl(gpr1, 3325);
    masm.jcc(ConditionFlag.NotEqual, bb2);
    // 0xffffffff,
    masm.mulsd(dest, externalAddress(allOnesPtr));
    // 0x3fefffff
    masm.jmp(bb15);
    masm.bind(bb2);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.subsd(temp3, dest);
    // 0x00000000,
    masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTablePtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTablePtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 1);
    masm.jcc(ConditionFlag.Less, bb4);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 536870911);
    masm.testl(gpr8, 268435456);
    masm.jcc(ConditionFlag.NotEqual, bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb6);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb8);
    masm.bind(bb9);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb10);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb11);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourPtr));
    // 0x40000000,
    masm.movdqu(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x18469899,
    // 0x3e64442d
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.pshufd(temp6, temp2, 0xE);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 29);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp6, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp6, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp6);
    masm.subsd(temp2, dest);
    masm.addsd(temp6, temp2);
    masm.bind(bb12);
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdq(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x54400000,
    masm.movdq(temp3, externalAddress(pOnePtr));
    // 0x3fb921fb
    // 0x1a600000,
    masm.movdqu(temp2, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.mulsd(temp3, temp1);
    masm.unpcklpd(temp1, temp1);
    masm.shll(gpr1, 3);
    masm.addl(gpr4, 1865216);
    masm.movdqu(temp4, dest);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 63);
    // 0x54400000,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3fb921fb
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.shll(gpr4, 5);
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp2, temp1);
    masm.subsd(dest, temp3);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.unpcklpd(dest, dest);
    masm.movdqu(temp3, temp4);
    masm.subsd(temp4, temp2);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp2);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp9, temp7);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp2);
    masm.movdqu(temp2, temp8);
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.subsd(temp1, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb8);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.xorpd(dest, dest);
    masm.xorpd(temp6, temp6);
    masm.jmp(bb12);
    masm.bind(bb10);
    masm.jcc(ConditionFlag.Equal, bb11);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb11);
    masm.bind(bb4);
    masm.negl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 3);
    masm.jmp(bb6);
    masm.bind(bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 536870912);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 536870912);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb6);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 3);
    masm.addl(gpr6, 536870912);
    masm.jmp(bb6);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 32 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG10() ---------------------
     *
     * Let x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
     * short approximation for log10(e)
     *
     * Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
     *
     * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
     * (high, low parts) Result is formed from high and low parts
     *
     * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
     * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
     *
     */
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
    ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
    ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
    ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
    ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    // 0xf8000000,
    masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0xffffe000
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movl(gpr2, 1054736384);
    masm.movdl(temp7, gpr2);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr3, 30704);
    masm.pinsrw(temp3, gpr3, 3);
    masm.movl(gpr3, 32768);
    masm.movdl(temp4, gpr3);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.por(temp1, temp3);
    masm.andpd(temp5, temp1);
    masm.paddd(dest, temp4);
    // 0xc1a5f12e,
    masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
    // 0x40358874,
    // 0x64d4ef0d,
    // 0xc0089309
    masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
    // 0x385593b1,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xc025c917,
    // 0xdc963467,
    // 0x3ffc6a02
    masm.subsd(temp1, temp5);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.andpd(dest, temp6);
    // 0x509f7800,
    masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
    // 0x3f934413
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp1, dest);
    masm.subsd(temp5, temp2);
    // 0x7f9d3aa1,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x4016ab9f,
    // 0xdc77b115,
    // 0xbff27af2
    masm.leaq(gpr4, externalAddress(logTenTablePtr));
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
    masm.addsd(temp1, temp5);
    masm.mulsd(temp6, temp7);
    masm.pshufd(temp5, temp1, 0x44);
    masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
    // 0x1f12b358,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3cdfef31
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.leaq(gpr4, externalAddress(logTenEPtr));
    // 0xbf2e4108,
    masm.movdq(temp6, new AMD64Address(gpr4, 8));
    // 0x3f5a7a6c
    masm.mulpd(temp5, temp5);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.pshufd(temp2, dest, 0xE4);
    masm.addsd(dest, temp1);
    masm.mulsd(temp4, temp1);
    masm.subsd(temp2, dest);
    masm.mulsd(temp6, temp1);
    masm.addsd(temp1, temp2);
    masm.pshufd(temp2, dest, 0xEE);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addsd(temp1, temp6);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 18416);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 33 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64MathIntrinsicBinaryOp method powIntrinsic.

public void powIntrinsic(Register dest, Register value1, Register value2, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highSigMaskPtr = new ArrayDataPointerConstant(highSigMask, 16);
    ArrayDataPointerConstant logTwoEPtr = new ArrayDataPointerConstant(logTwoE, 16);
    ArrayDataPointerConstant highmaskYPtr = new ArrayDataPointerConstant(highmaskY, 16);
    ArrayDataPointerConstant tExpPtr = new ArrayDataPointerConstant(tExp, 16);
    ArrayDataPointerConstant eCoeffPtr = new ArrayDataPointerConstant(eCoeff, 16);
    ArrayDataPointerConstant coeffHPtr = new ArrayDataPointerConstant(coeffH, 16);
    ArrayDataPointerConstant highmaskLogXPtr = new ArrayDataPointerConstant(highmaskLogX, 16);
    ArrayDataPointerConstant halfmaskPtr = new ArrayDataPointerConstant(halfmask, 8);
    ArrayDataPointerConstant coeffPowPtr = new ArrayDataPointerConstant(coeffPow, 16);
    ArrayDataPointerConstant lTblPowPtr = new ArrayDataPointerConstant(lTblPow, 16);
    ArrayDataPointerConstant logTwoPowPtr = new ArrayDataPointerConstant(logTwoPow, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Label bb16 = new Label();
    Label bb18 = new Label();
    Label bb19 = new Label();
    Label bb20 = new Label();
    Label bb21 = new Label();
    Label bb22 = new Label();
    Label bb23 = new Label();
    Label bb24 = new Label();
    Label bb25 = new Label();
    Label bb26 = new Label();
    Label bb27 = new Label();
    Label bb28 = new Label();
    Label bb29 = new Label();
    Label bb30 = new Label();
    Label bb31 = new Label();
    Label bb32 = new Label();
    Label bb33 = new Label();
    Label bb34 = new Label();
    Label bb35 = new Label();
    Label bb36 = new Label();
    Label bb37 = new Label();
    Label bb38 = new Label();
    Label bb39 = new Label();
    Label bb40 = new Label();
    Label bb41 = new Label();
    Label bb42 = new Label();
    Label bb43 = new Label();
    Label bb44 = new Label();
    Label bb45 = new Label();
    Label bb46 = new Label();
    Label bb47 = new Label();
    Label bb48 = new Label();
    Label bb49 = new Label();
    Label bb50 = new Label();
    Label bb51 = new Label();
    Label bb53 = new Label();
    Label bb54 = new Label();
    Label bb55 = new Label();
    Label bb56 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
    setCrb(crb);
    masm.movdqu(temp10, value1);
    masm.movsd(temp8, value2);
    if (dest.encoding != value1.encoding) {
        masm.movdqu(dest, value1);
    }
    // 0x00000000,
    masm.movq(temp9, externalAddress(logTwoEPtr));
    // 0x3ff72000
    masm.pextrw(gpr1, dest, 3);
    masm.xorpd(temp2, temp2);
    masm.movq(gpr2, 0x3ff0000000000000L);
    masm.movdq(temp2, gpr2);
    masm.movl(gpr5, 1069088768);
    masm.movdq(temp7, gpr5);
    masm.xorpd(temp1, temp1);
    masm.movq(gpr6, 0x77f0000000000000L);
    masm.movdq(temp1, gpr6);
    masm.movdqu(temp3, dest);
    masm.movl(gpr4, 32752);
    masm.andl(gpr4, gpr1);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.por(dest, temp2);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    masm.movl(gpr7, 8192);
    masm.movdq(temp4, gpr7);
    masm.psrlq(temp3, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.movl(gpr5, 0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1);
    masm.subl(gpr3, 4);
    masm.shll(gpr4);
    masm.shlq(gpr4, 32);
    masm.movdq(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.subl(gpr1, 16351);
    masm.cmpl(gpr1, 1);
    masm.jcc(ConditionFlag.BelowEqual, bb2);
    masm.paddd(dest, temp4);
    masm.pand(temp5, temp3);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.bind(bb3);
    masm.subsd(temp3, temp5);
    masm.pand(dest, temp6);
    masm.subl(gpr1, 1);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.bind(bb4);
    masm.mulsd(temp3, dest);
    masm.leaq(gpr8, externalAddress(coeffPowPtr));
    // 0x6dc96112,
    masm.movdqu(temp1, new AMD64Address(gpr8, 0));
    // 0xbf836578,
    // 0xee241472,
    // 0xbf9b0301
    // 0x9f95985a,
    masm.movdqu(temp4, new AMD64Address(gpr8, 16));
    // 0xbfb528db,
    // 0xb3841d2a,
    // 0xbfd619b6
    // 0x518775e3,
    masm.movdqu(temp6, new AMD64Address(gpr8, 32));
    // 0x3f9004f2,
    // 0xac8349bb,
    // 0x3fa76c9b
    // 0x486ececc,
    masm.movdqu(dest, new AMD64Address(gpr8, 48));
    // 0x3fc4635e,
    // 0x161bb241,
    // 0xbf5dabe1
    masm.subsd(temp5, temp9);
    masm.movl(gpr3, gpr1);
    masm.sarl(gpr1, 31);
    masm.addl(gpr3, gpr1);
    masm.xorl(gpr1, gpr3);
    masm.addl(gpr1, 1);
    masm.bsrl(gpr1, gpr1);
    masm.unpcklpd(temp5, temp3);
    masm.addsd(temp3, temp5);
    masm.leaq(gpr7, externalAddress(lTblPowPtr));
    masm.andl(gpr4, 16760832);
    masm.shrl(gpr4, 10);
    masm.addpd(temp5, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
    masm.pshufd(temp2, temp3, 0x44);
    masm.mulsd(temp3, temp3);
    masm.mulpd(temp1, temp2);
    masm.mulpd(temp4, temp2);
    masm.addsd(temp5, temp7);
    masm.mulsd(temp2, temp3);
    masm.addpd(temp6, temp1);
    masm.mulsd(temp3, temp3);
    masm.addpd(dest, temp4);
    masm.movdqu(temp1, temp8);
    masm.pextrw(gpr3, temp8, 3);
    masm.pshufd(temp7, temp5, 0xEE);
    // 0x00000000,
    masm.movq(temp4, externalAddress(highmaskYPtr));
    // 0xfffffff8
    masm.mulpd(temp6, temp2);
    masm.pshufd(temp3, temp3, 0x44);
    masm.mulpd(dest, temp2);
    masm.shll(gpr1, 4);
    masm.subl(gpr1, 15872);
    masm.andl(gpr3, 32752);
    masm.addl(gpr1, gpr3);
    masm.mulpd(temp3, temp6);
    masm.cmpl(gpr1, 624);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr4, 17080);
    masm.pinsrw(temp6, gpr4, 3);
    masm.movdqu(temp2, temp1);
    masm.pand(temp4, temp1);
    masm.subsd(temp1, temp4);
    masm.mulsd(temp4, temp5);
    masm.addsd(dest, temp7);
    masm.mulsd(temp1, temp5);
    masm.movdqu(temp7, temp6);
    masm.addsd(temp6, temp4);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.addpd(temp3, dest);
    masm.movdl(gpr4, temp6);
    masm.movl(gpr3, gpr4);
    masm.andl(gpr4, 255);
    masm.addl(gpr4, gpr4);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
    masm.subsd(temp6, temp7);
    masm.pshufd(dest, temp3, 0xEE);
    masm.subsd(temp4, temp6);
    masm.addsd(dest, temp3);
    masm.addsd(temp4, temp1);
    masm.mulsd(temp2, dest);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    masm.shll(gpr3, 12);
    masm.xorl(gpr3, gpr5);
    masm.andl(gpr3, -1048576);
    masm.movdq(temp6, gpr3);
    masm.addsd(temp2, temp4);
    masm.movq(gpr2, 0x3fe62e42fefa39efL);
    masm.movdq(temp1, gpr2);
    masm.pshufd(dest, temp2, 0x44);
    masm.pshufd(temp4, temp2, 0x44);
    masm.mulsd(temp1, temp2);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp4);
    masm.paddd(temp5, temp6);
    masm.mulsd(temp1, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulsd(dest, dest);
    masm.addpd(temp3, temp7);
    masm.addsd(temp1, temp6);
    masm.mulpd(dest, temp3);
    masm.pshufd(temp3, dest, 0xEE);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp3);
    masm.addsd(dest, temp5);
    masm.jmp(bb56);
    masm.bind(bb0);
    masm.addl(gpr1, 16);
    masm.movl(gpr4, 32752);
    masm.andl(gpr4, gpr1);
    masm.cmpl(gpr4, 32752);
    masm.jcc(ConditionFlag.Equal, bb6);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb7);
    masm.bind(bb8);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp3, temp10);
    masm.movdl(gpr4, temp3);
    masm.psrlq(temp3, 32);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr4, gpr3);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp3, gpr1, 3);
    masm.mulsd(dest, temp3);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp3, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr3, 18416);
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(temp3, 12);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1024);
    masm.movdl(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.paddd(dest, temp4);
    masm.psllq(temp5, 32);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp3);
    masm.movl(gpr5, 0);
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 18416);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.jmp(bb4);
    masm.bind(bb10);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp3, temp10);
    masm.movdl(gpr4, temp3);
    masm.psrlq(temp3, 32);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr4, gpr3);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp3, gpr1, 3);
    masm.mulsd(dest, temp3);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp3, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr3, 18416);
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(temp3, 12);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1024);
    masm.movdl(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.paddd(dest, temp4);
    masm.psllq(temp5, 32);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp3);
    masm.movl(gpr5, Integer.MIN_VALUE);
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 18416);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.jmp(bb4);
    masm.bind(bb5);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.Less, bb11);
    masm.cmpl(gpr1, 752);
    masm.jcc(ConditionFlag.AboveEqual, bb12);
    masm.addsd(dest, temp7);
    // 0xf8000000,
    masm.movq(temp4, externalAddress(halfmaskPtr));
    // 0xffffffff
    masm.addpd(temp3, dest);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr1, 17080);
    masm.pinsrw(temp6, gpr1, 3);
    masm.pshufd(dest, temp3, 0xEE);
    masm.addsd(dest, temp3);
    masm.movdqu(temp3, temp5);
    masm.addsd(temp5, dest);
    masm.subsd(temp3, temp5);
    masm.movdqu(temp7, temp5);
    masm.pand(temp5, temp4);
    masm.movdqu(temp2, temp1);
    masm.pand(temp4, temp1);
    masm.subsd(temp7, temp5);
    masm.addsd(dest, temp3);
    masm.subsd(temp1, temp4);
    masm.mulsd(temp4, temp5);
    masm.addsd(dest, temp7);
    masm.mulsd(temp2, dest);
    masm.movdqu(temp7, temp6);
    masm.mulsd(temp1, temp5);
    masm.addsd(temp6, temp4);
    masm.movdl(gpr1, temp6);
    masm.subsd(temp6, temp7);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.movl(gpr3, gpr1);
    masm.andl(gpr1, 255);
    masm.addl(gpr1, gpr1);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr1, Scale.Times8, 0));
    masm.addsd(temp2, temp1);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    masm.subsd(temp4, temp6);
    masm.pextrw(gpr4, temp6, 3);
    masm.addsd(temp2, temp4);
    masm.sarl(gpr3, 8);
    masm.movl(gpr1, gpr3);
    masm.sarl(gpr3, 1);
    masm.subl(gpr1, gpr3);
    masm.shll(gpr3, 20);
    masm.xorl(gpr3, gpr5);
    masm.movdl(temp6, gpr3);
    // 0xfefa39ef,
    masm.movq(temp1, new AMD64Address(gpr8, 32));
    // 0x3fe62e42
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 16529);
    masm.jcc(ConditionFlag.Above, bb12);
    masm.pshufd(dest, temp2, 0x44);
    masm.pshufd(temp4, temp2, 0x44);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp4);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulsd(temp1, temp2);
    masm.mulsd(dest, dest);
    masm.paddd(temp5, temp6);
    masm.addpd(temp3, temp7);
    masm.mulsd(temp1, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulpd(dest, temp3);
    masm.addsd(temp1, temp6);
    masm.pshufd(temp3, dest, 0xEE);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.shll(gpr1, 4);
    masm.xorpd(temp4, temp4);
    masm.addl(gpr1, 16368);
    masm.pinsrw(temp4, gpr1, 3);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp3);
    masm.movdqu(temp1, dest);
    masm.addsd(dest, temp5);
    masm.mulsd(dest, temp4);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb13);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.jmp(bb56);
    masm.bind(bb6);
    masm.movdqu(temp1, temp8);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp2, dest);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb15);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb16);
    masm.addsd(dest, dest);
    masm.jmp(bb56);
    masm.bind(bb16);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb18);
    masm.addpd(dest, temp8);
    masm.jmp(bb56);
    masm.bind(bb15);
    masm.movdl(gpr1, temp1);
    masm.movdqu(temp2, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb19);
    masm.pextrw(gpr1, temp2, 3);
    masm.andl(gpr1, 32752);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb20);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb18);
    masm.bind(bb20);
    masm.pextrw(gpr1, dest, 3);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb21);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotZero, bb22);
    masm.jmp(bb56);
    masm.bind(bb23);
    masm.movdl(gpr1, temp8);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb24);
    masm.testl(gpr1, 2);
    masm.jcc(ConditionFlag.NotEqual, bb25);
    masm.jmp(bb24);
    masm.bind(bb21);
    masm.shrl(gpr3, 20);
    masm.andl(gpr3, 2047);
    masm.cmpl(gpr3, 1075);
    masm.jcc(ConditionFlag.Above, bb24);
    masm.jcc(ConditionFlag.Equal, bb26);
    masm.cmpl(gpr3, 1074);
    masm.jcc(ConditionFlag.Above, bb23);
    masm.cmpl(gpr3, 1023);
    masm.jcc(ConditionFlag.Below, bb24);
    masm.movdqu(temp1, temp8);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp3, temp3);
    masm.pinsrw(temp3, gpr1, 3);
    masm.movdqu(temp4, temp3);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb24);
    masm.movdl(gpr1, temp3);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb24);
    masm.bind(bb25);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb27);
    masm.jmp(bb56);
    masm.bind(bb27);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32768);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb24);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb22);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb26);
    masm.movdl(gpr1, temp8);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb24);
    masm.jmp(bb25);
    masm.bind(bb28);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 20);
    masm.movdl(gpr4, temp1);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb29);
    masm.addsd(dest, temp8);
    masm.jmp(bb56);
    masm.bind(bb29);
    masm.movdqu(dest, temp10);
    masm.pextrw(gpr1, dest, 3);
    masm.cmpl(gpr1, 49136);
    masm.jcc(ConditionFlag.NotEqual, bb30);
    masm.movdl(gpr3, dest);
    masm.psrlq(dest, 20);
    masm.movdl(gpr4, dest);
    masm.orl(gpr3, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb30);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32760);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb30);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.pextrw(gpr4, temp8, 3);
    masm.xorpd(dest, dest);
    masm.xorl(gpr1, gpr4);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb31);
    masm.jmp(bb56);
    masm.bind(bb31);
    masm.movl(gpr3, 32752);
    masm.pinsrw(dest, gpr3, 3);
    masm.jmp(bb56);
    masm.bind(bb32);
    masm.movdl(gpr1, temp1);
    masm.cmpl(gpr4, 17184);
    masm.jcc(ConditionFlag.Above, bb33);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb34);
    masm.testl(gpr1, 2);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.jmp(bb36);
    masm.bind(bb33);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.jmp(bb36);
    masm.bind(bb7);
    masm.movdqu(temp2, temp10);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 31);
    masm.movdl(gpr3, temp2);
    masm.orl(gpr1, gpr3);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.pextrw(gpr4, temp8, 3);
    masm.movdl(gpr1, temp8);
    masm.movdqu(temp2, temp8);
    masm.psrlq(temp2, 32);
    masm.movdl(gpr3, temp2);
    masm.addl(gpr3, gpr3);
    masm.orl(gpr3, gpr1);
    masm.jcc(ConditionFlag.Equal, bb37);
    masm.andl(gpr4, 32752);
    masm.cmpl(gpr4, 32752);
    masm.jcc(ConditionFlag.Equal, bb28);
    masm.cmpl(gpr4, 17200);
    masm.jcc(ConditionFlag.Above, bb35);
    masm.cmpl(gpr4, 17184);
    masm.jcc(ConditionFlag.AboveEqual, bb32);
    masm.cmpl(gpr4, 16368);
    masm.jcc(ConditionFlag.Below, bb34);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp2, temp2);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp4, temp2);
    masm.addsd(temp2, temp1);
    masm.subsd(temp4, temp2);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32767);
    masm.jcc(ConditionFlag.NotEqual, bb34);
    masm.movdl(gpr1, temp2);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.bind(bb36);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr4, 30704);
    masm.pinsrw(temp1, gpr4, 3);
    masm.pextrw(gpr1, temp10, 3);
    masm.movl(gpr4, 8192);
    masm.movdl(temp4, gpr4);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16);
    masm.jcc(ConditionFlag.Less, bb10);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr4, 32752);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.movl(gpr5, Integer.MIN_VALUE);
    masm.jmp(bb1);
    masm.bind(bb34);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.xorpd(dest, dest);
    masm.mulsd(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb35);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr4, 30704);
    masm.pinsrw(temp1, gpr4, 3);
    masm.pextrw(gpr1, temp10, 3);
    masm.movl(gpr4, 8192);
    masm.movdl(temp4, gpr4);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16);
    masm.jcc(ConditionFlag.Less, bb8);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr4, 32752);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.movl(gpr5, 0);
    masm.jmp(bb1);
    masm.bind(bb19);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb22);
    masm.xorpd(dest, dest);
    masm.jmp(bb56);
    masm.bind(bb11);
    masm.addl(gpr1, 384);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.Less, bb38);
    masm.mulsd(temp5, temp1);
    masm.addsd(dest, temp7);
    masm.shrl(gpr5, 31);
    masm.addpd(temp3, dest);
    masm.pshufd(dest, temp3, 0xEE);
    masm.addsd(temp3, dest);
    // 0xfefa39ef,
    masm.leaq(gpr7, externalAddress(logTwoPowPtr));
    // 0x3fe62e42,
    // 0xfefa39ef,
    // 0xbfe62e42
    masm.movq(temp4, new AMD64Address(gpr7, gpr5, Scale.Times8, 0));
    masm.mulsd(temp1, temp3);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.shll(gpr5, 15);
    masm.orl(gpr1, gpr5);
    masm.pinsrw(dest, gpr1, 3);
    masm.addsd(temp5, temp1);
    masm.mulsd(temp5, temp4);
    masm.addsd(dest, temp5);
    masm.jmp(bb56);
    masm.bind(bb38);
    masm.bind(bb37);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb39);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb9);
    masm.movdqu(temp2, temp8);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32752);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb40);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb18);
    masm.bind(bb40);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb39);
    masm.shrl(gpr4, 21);
    masm.cmpl(gpr4, 1075);
    masm.jcc(ConditionFlag.Above, bb41);
    masm.jcc(ConditionFlag.Equal, bb42);
    masm.cmpl(gpr4, 1023);
    masm.jcc(ConditionFlag.Below, bb41);
    masm.movdqu(temp1, temp8);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp3, temp3);
    masm.pinsrw(temp3, gpr1, 3);
    masm.movdqu(temp4, temp3);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb41);
    masm.movdl(gpr1, temp3);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb41);
    masm.bind(bb43);
    masm.movdqu(dest, temp10);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb44);
    masm.jmp(bb56);
    masm.bind(bb42);
    masm.movdl(gpr1, temp8);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb43);
    masm.bind(bb41);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.Equal, bb22);
    masm.xorpd(dest, dest);
    masm.bind(bb44);
    masm.movl(gpr1, 16368);
    masm.xorpd(temp1, temp1);
    masm.pinsrw(temp1, gpr1, 3);
    masm.divsd(temp1, dest);
    masm.movdqu(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb12);
    masm.pextrw(gpr1, temp10, 3);
    masm.pextrw(gpr4, temp8, 3);
    masm.movl(gpr3, 32752);
    masm.andl(gpr3, gpr4);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb45);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.xorl(gpr4, gpr1);
    masm.testl(gpr4, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb46);
    masm.bind(bb47);
    masm.movl(gpr1, 32736);
    masm.pinsrw(dest, gpr1, 3);
    masm.shrl(gpr5, 16);
    masm.orl(gpr1, gpr5);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.bind(bb14);
    masm.jmp(bb56);
    masm.bind(bb46);
    masm.movl(gpr1, 16);
    masm.pinsrw(dest, gpr1, 3);
    masm.mulsd(dest, dest);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.Equal, bb48);
    masm.movq(gpr2, 0x8000000000000000L);
    masm.movdq(temp2, gpr2);
    masm.xorpd(dest, temp2);
    masm.bind(bb48);
    masm.jmp(bb56);
    masm.bind(bb13);
    masm.pextrw(gpr3, temp5, 3);
    masm.pextrw(gpr4, temp4, 3);
    masm.movl(gpr1, -1);
    masm.andl(gpr3, 32752);
    masm.subl(gpr3, 16368);
    masm.andl(gpr4, 32752);
    masm.addl(gpr4, gpr3);
    masm.movl(gpr3, -31);
    masm.sarl(gpr4, 4);
    masm.subl(gpr3, gpr4);
    masm.jcc(ConditionFlag.LessEqual, bb49);
    masm.cmpl(gpr3, 20);
    masm.jcc(ConditionFlag.Above, bb50);
    masm.shll(gpr1);
    masm.bind(bb49);
    masm.movdl(dest, gpr1);
    masm.psllq(dest, 32);
    masm.pand(dest, temp5);
    masm.subsd(temp5, dest);
    masm.addsd(temp5, temp1);
    masm.mulsd(dest, temp4);
    masm.mulsd(temp5, temp4);
    masm.addsd(dest, temp5);
    masm.bind(bb50);
    masm.jmp(bb48);
    masm.bind(bb2);
    masm.pextrw(gpr3, temp8, 3);
    masm.movl(gpr4, Integer.MIN_VALUE);
    masm.movdl(temp1, gpr4);
    masm.xorpd(temp7, temp7);
    masm.paddd(dest, temp4);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.paddq(temp1, temp3);
    masm.pand(temp5, temp1);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 16560);
    masm.jcc(ConditionFlag.Less, bb3);
    masm.leaq(gpr7, externalAddress(lTblPowPtr));
    masm.leaq(gpr8, externalAddress(coeffHPtr));
    // 0x00000000,
    masm.movdqu(temp4, new AMD64Address(gpr8, 0));
    // 0xbfd61a00,
    // 0x00000000,
    // 0xbf5dabe1
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.addl(gpr1, 16351);
    masm.shrl(gpr1, 4);
    masm.subl(gpr1, 1022);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp3, dest);
    masm.subsd(temp5, temp9);
    masm.pshufd(temp1, temp4, 0xE);
    masm.pshufd(temp2, temp3, 0x44);
    masm.unpcklpd(temp5, temp3);
    masm.addsd(temp3, temp5);
    masm.andl(gpr4, 16760832);
    masm.shrl(gpr4, 10);
    masm.addpd(temp7, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
    masm.movdqu(temp6, temp4);
    masm.mulsd(temp4, temp5);
    masm.movdqu(dest, temp1);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp6, temp2);
    masm.mulsd(temp1, temp2);
    masm.movdqu(temp2, temp5);
    masm.mulsd(temp4, temp5);
    masm.addsd(temp5, dest);
    masm.movdqu(dest, temp7);
    masm.addsd(temp2, temp3);
    masm.addsd(temp7, temp5);
    masm.mulsd(temp6, temp2);
    masm.subsd(dest, temp7);
    masm.movdqu(temp2, temp7);
    masm.addsd(temp7, temp4);
    masm.addsd(dest, temp5);
    masm.subsd(temp2, temp7);
    masm.addsd(temp4, temp2);
    masm.pshufd(temp2, temp5, 0xEE);
    masm.movdqu(temp5, temp7);
    masm.addsd(temp7, temp2);
    masm.addsd(temp4, dest);
    masm.leaq(gpr8, externalAddress(coeffPowPtr));
    // 0x6dc96112,
    masm.movdqu(dest, new AMD64Address(gpr8, 0));
    // 0xbf836578,
    // 0xee241472,
    // 0xbf9b0301
    masm.subsd(temp5, temp7);
    masm.addsd(temp6, temp4);
    masm.movdqu(temp4, temp7);
    masm.addsd(temp5, temp2);
    masm.addsd(temp7, temp1);
    // 0x486ececc,
    masm.movdqu(temp2, new AMD64Address(gpr8, 64));
    // 0x3fc4635e,
    // 0x161bb241,
    // 0xbf5dabe1
    masm.subsd(temp4, temp7);
    masm.addsd(temp6, temp5);
    masm.addsd(temp4, temp1);
    masm.pshufd(temp5, temp7, 0xEE);
    masm.movapd(temp1, temp7);
    masm.addsd(temp7, temp5);
    masm.subsd(temp1, temp7);
    masm.addsd(temp1, temp5);
    // 0x9f95985a,
    masm.movdqu(temp5, new AMD64Address(gpr8, 80));
    // 0xbfb528db,
    // 0xf8b5787d,
    // 0x3ef2531e
    masm.pshufd(temp3, temp3, 0x44);
    masm.addsd(temp6, temp4);
    masm.addsd(temp6, temp1);
    // 0x9f95985a,
    masm.movdqu(temp1, new AMD64Address(gpr8, 32));
    // 0xbfb528db,
    // 0xb3841d2a,
    // 0xbfd619b6
    masm.mulpd(dest, temp3);
    masm.mulpd(temp2, temp3);
    masm.pshufd(temp4, temp3, 0x44);
    masm.mulpd(temp3, temp3);
    masm.addpd(dest, temp1);
    masm.addpd(temp5, temp2);
    masm.mulsd(temp4, temp3);
    // 0xf8000000,
    masm.movq(temp2, externalAddress(highmaskLogXPtr));
    // 0xffffffff
    masm.mulpd(temp3, temp3);
    masm.movdqu(temp1, temp8);
    masm.pextrw(gpr3, temp8, 3);
    masm.mulpd(dest, temp4);
    masm.pextrw(gpr1, temp7, 3);
    masm.mulpd(temp5, temp4);
    masm.mulpd(dest, temp3);
    masm.leaq(gpr8, externalAddress(highmaskYPtr));
    // 0x00000000,
    masm.movq(temp4, new AMD64Address(gpr8, 8));
    // 0xffffffff
    masm.pand(temp2, temp7);
    masm.addsd(temp5, temp6);
    masm.subsd(temp7, temp2);
    masm.addpd(temp5, dest);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb45);
    masm.addl(gpr3, gpr1);
    masm.cmpl(gpr3, 16576);
    masm.jcc(ConditionFlag.AboveEqual, bb51);
    masm.pshufd(dest, temp5, 0xEE);
    masm.pand(temp4, temp1);
    masm.movdqu(temp3, temp1);
    masm.addsd(temp5, dest);
    masm.subsd(temp1, temp4);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr4, 17080);
    masm.pinsrw(temp6, gpr4, 3);
    masm.addsd(temp7, temp5);
    masm.mulsd(temp4, temp2);
    masm.mulsd(temp1, temp2);
    masm.movdqu(temp5, temp6);
    masm.mulsd(temp3, temp7);
    masm.addsd(temp6, temp4);
    masm.addsd(temp1, temp3);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    masm.movdl(gpr4, temp6);
    masm.subsd(temp6, temp5);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.movl(gpr3, gpr4);
    masm.andl(gpr4, 255);
    masm.addl(gpr4, gpr4);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    // 0xfefa39ef,
    masm.movq(temp2, new AMD64Address(gpr8, 32));
    // 0x3fe62e42
    masm.subsd(temp4, temp6);
    masm.addsd(temp4, temp1);
    masm.pextrw(gpr4, temp6, 3);
    masm.shrl(gpr3, 8);
    masm.movl(gpr1, gpr3);
    masm.shrl(gpr3, 1);
    masm.subl(gpr1, gpr3);
    masm.shll(gpr3, 20);
    masm.movdl(temp6, gpr3);
    masm.pshufd(dest, temp4, 0x44);
    masm.pshufd(temp1, temp4, 0x44);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp1);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulsd(temp2, temp4);
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 16529);
    masm.jcc(ConditionFlag.Above, bb12);
    masm.mulsd(dest, dest);
    masm.paddd(temp5, temp6);
    masm.addpd(temp3, temp7);
    masm.mulsd(temp2, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulpd(dest, temp3);
    masm.addsd(temp2, temp6);
    masm.pshufd(temp3, dest, 0xEE);
    masm.addl(gpr1, 1023);
    masm.shll(gpr1, 20);
    masm.orl(gpr1, gpr5);
    masm.movdl(temp4, gpr1);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.addsd(dest, temp2);
    masm.psllq(temp4, 32);
    masm.addsd(dest, temp3);
    masm.movdqu(temp1, dest);
    masm.addsd(dest, temp5);
    masm.mulsd(dest, temp4);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb13);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.jmp(bb56);
    masm.bind(bb45);
    masm.movdqu(dest, temp10);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 49136);
    masm.pinsrw(temp2, gpr1, 3);
    masm.addsd(temp2, dest);
    masm.pextrw(gpr1, temp2, 3);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.NotEqual, bb53);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32760);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb53);
    masm.movdqu(temp1, temp8);
    masm.movdl(gpr4, temp1);
    masm.movdqu(temp3, temp1);
    masm.psrlq(temp3, 20);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr3, gpr4);
    masm.jcc(ConditionFlag.Equal, bb54);
    masm.addsd(temp1, temp1);
    masm.movdqu(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb51);
    masm.pextrw(gpr1, temp1, 3);
    masm.pextrw(gpr3, temp2, 3);
    masm.xorl(gpr1, gpr3);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb47);
    masm.jmp(bb46);
    masm.bind(bb54);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.pextrw(gpr4, temp1, 3);
    masm.xorpd(dest, dest);
    masm.subl(gpr1, 16368);
    masm.xorl(gpr1, gpr4);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb55);
    masm.jmp(bb56);
    masm.bind(bb55);
    masm.movl(gpr4, 32752);
    masm.pinsrw(dest, gpr4, 3);
    masm.jmp(bb56);
    masm.bind(bb56);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 34 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64ArrayCompareToOp method loadNextElements.

private void loadNextElements(AMD64MacroAssembler masm, Register elem1, Register elem2, Register str1, Register str2, AMD64Address.Scale scale, AMD64Address.Scale scale1, AMD64Address.Scale scale2, Register index) {
    // if (ae == StrIntrinsicNode::LL) {
    if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
        masm.movzbl(elem1, new AMD64Address(str1, index, scale, 0));
        masm.movzbl(elem2, new AMD64Address(str2, index, scale, 0));
    // } else if (ae == StrIntrinsicNode::UU) {
    } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        masm.movzwl(elem1, new AMD64Address(str1, index, scale, 0));
        masm.movzwl(elem2, new AMD64Address(str2, index, scale, 0));
    } else {
        masm.movzbl(elem1, new AMD64Address(str1, index, scale1, 0));
        masm.movzwl(elem2, new AMD64Address(str2, index, scale2, 0));
    }
}
Also used : AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 35 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64ArrayEqualsOp method emit8ByteCompare.

/**
 * Emits code that uses 8-byte vector compares.
 */
private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
    Label loop = new Label();
    Label compareTail = new Label();
    boolean requiresNaNCheck = kind.isNumericFloat();
    Label loopCheck = new Label();
    Label nanCheck = new Label();
    Register temp = asRegister(temp4);
    // tail count (in bytes)
    masm.andl(result, VECTOR_SIZE - 1);
    // vector count (in bytes)
    masm.andl(length, ~(VECTOR_SIZE - 1));
    masm.jcc(ConditionFlag.Zero, compareTail);
    masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
    masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
    masm.negq(length);
    // Align the main loop
    masm.align(crb.target.wordSize * 2);
    masm.bind(loop);
    masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0));
    masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0));
    masm.jcc(ConditionFlag.NotEqual, requiresNaNCheck ? nanCheck : falseLabel);
    masm.bind(loopCheck);
    masm.addq(length, VECTOR_SIZE);
    masm.jccb(ConditionFlag.NotZero, loop);
    masm.testl(result, result);
    masm.jcc(ConditionFlag.Zero, trueLabel);
    if (requiresNaNCheck) {
        // NaN check is slow path and hence placed outside of the main loop.
        Label unalignedCheck = new Label();
        masm.jmpb(unalignedCheck);
        masm.bind(nanCheck);
        // At most two iterations, unroll in the emitted code.
        for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
            emitFloatCompare(masm, array1, array2, length, offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
        }
        masm.jmpb(loopCheck);
        masm.bind(unalignedCheck);
    }
    /*
         * Compare the remaining bytes with an unaligned memory load aligned to the end of the
         * array.
         */
    masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE));
    masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE));
    if (requiresNaNCheck) {
        masm.jcc(ConditionFlag.Equal, trueLabel);
        // At most two iterations, unroll in the emitted code.
        for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
            emitFloatCompare(masm, array1, array2, result, -VECTOR_SIZE + offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
        }
    } else {
        masm.jccb(ConditionFlag.NotEqual, falseLabel);
    }
    masm.jmpb(trueLabel);
    masm.bind(compareTail);
    masm.movl(length, result);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)36 Register (jdk.vm.ci.code.Register)26 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)23 Label (org.graalvm.compiler.asm.Label)15 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)7 CallingConvention (jdk.vm.ci.code.CallingConvention)6 RegisterConfig (jdk.vm.ci.code.RegisterConfig)5 TargetDescription (jdk.vm.ci.code.TargetDescription)5 Field (java.lang.reflect.Field)4 AMD64Assembler (org.graalvm.compiler.asm.amd64.AMD64Assembler)4 AssemblerTest (org.graalvm.compiler.asm.test.AssemblerTest)4 CompilationResult (org.graalvm.compiler.code.CompilationResult)4 Test (org.junit.Test)4 AMD64MacroAssembler (org.graalvm.compiler.asm.amd64.AMD64MacroAssembler)3 AMD64Kind (jdk.vm.ci.amd64.AMD64Kind)2 Scale (org.graalvm.compiler.asm.amd64.AMD64Address.Scale)2 CGlobalDataReference (com.oracle.svm.core.graal.code.CGlobalDataReference)1 SubstrateRegisterConfig (com.oracle.svm.core.graal.meta.SubstrateRegisterConfig)1 AMD64.rax (jdk.vm.ci.amd64.AMD64.rax)1 AMD64.rbx (jdk.vm.ci.amd64.AMD64.rbx)1