Search in sources :

Example 1 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method expIntrinsic.

public void expIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 16);
    ArrayDataPointerConstant cvExpPtr = new ArrayDataPointerConstant(cvExp, 16);
    ArrayDataPointerConstant shifterExpPtr = new ArrayDataPointerConstant(shifterExp, 8);
    ArrayDataPointerConstant mMaskExpPtr = new ArrayDataPointerConstant(mMaskExp, 16);
    ArrayDataPointerConstant biasExpPtr = new ArrayDataPointerConstant(biasExp, 16);
    ArrayDataPointerConstant tblAddrExpPtr = new ArrayDataPointerConstant(tblAddrExp, 16);
    ArrayDataPointerConstant expBiasPtr = new ArrayDataPointerConstant(expBias, 8);
    ArrayDataPointerConstant xMaxExpPtr = new ArrayDataPointerConstant(xMaxExp, 8);
    ArrayDataPointerConstant xMinExpPtr = new ArrayDataPointerConstant(xMinExp, 8);
    ArrayDataPointerConstant infExpPtr = new ArrayDataPointerConstant(infExp, 8);
    ArrayDataPointerConstant zeroExpPtr = new ArrayDataPointerConstant(zeroExp, 8);
    ArrayDataPointerConstant allOnesExpPtr = new ArrayDataPointerConstant(allOnesExp, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb14 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movsd(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    // 0xffffffc0,
    masm.movdqu(temp9, externalAddress(mMaskExpPtr));
    // 0x00000000,
    // 0xffffffc0,
    // 0x00000000
    // 0x0000ffc0,
    masm.movdqu(temp10, externalAddress(biasExpPtr));
    // 0x00000000,
    // 0x0000ffc0,
    // 0x00000000
    masm.unpcklpd(dest, dest);
    masm.leaq(gpr5, stackSlot);
    masm.leaq(gpr2, externalAddress(cvExpPtr));
    // 0x652b82fe,
    masm.movdqu(temp1, new AMD64Address(gpr2, 0));
    // 0x40571547,
    // 0x652b82fe,
    // 0x40571547
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(shifterExpPtr));
    // 0x43380000,
    // 0x00000000,
    // 0x43380000
    // 0xfefa0000,
    masm.movdqu(temp2, new AMD64Address(gpr2, 16));
    // 0x3f862e42,
    // 0xfefa0000,
    // 0x3f862e42
    // 0xbc9e3b3a,
    masm.movdqu(temp3, new AMD64Address(gpr2, 32));
    // 0x3d1cf79a,
    // 0xbc9e3b3a,
    // 0x3d1cf79a
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32767);
    masm.movl(gpr4, 16527);
    masm.subl(gpr4, gpr1);
    masm.subl(gpr1, 15504);
    masm.orl(gpr4, gpr1);
    masm.cmpl(gpr4, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.leaq(gpr4, externalAddress(tblAddrExpPtr));
    // 0xfffffffe,
    masm.movdqu(temp8, new AMD64Address(gpr2, 48));
    // 0x3fdfffff,
    // 0xfffffffe,
    // 0x3fdfffff
    // 0xe3289860,
    masm.movdqu(temp4, new AMD64Address(gpr2, 64));
    // 0x3f56c15c,
    // 0x555b9e25,
    // 0x3fa55555
    // 0xc090cf0f,
    masm.movdqu(temp5, new AMD64Address(gpr2, 80));
    // 0x3f811115,
    // 0x55548ba1,
    // 0x3fc55555
    masm.mulpd(temp1, dest);
    masm.addpd(temp1, temp6);
    masm.movapd(temp7, temp1);
    masm.movdl(gpr1, temp1);
    masm.pand(temp7, temp9);
    masm.subpd(temp1, temp6);
    masm.mulpd(temp2, temp1);
    masm.mulpd(temp3, temp1);
    masm.paddq(temp7, temp10);
    masm.subpd(dest, temp2);
    masm.movl(gpr3, gpr1);
    masm.andl(gpr3, 63);
    masm.shll(gpr3, 4);
    masm.movdqu(temp2, new AMD64Address(gpr3, gpr4, Scale.Times1, 0));
    masm.sarl(gpr1, 6);
    masm.psllq(temp7, 46);
    masm.subpd(dest, temp3);
    masm.mulpd(temp4, dest);
    masm.movl(gpr4, gpr1);
    masm.movapd(temp6, dest);
    masm.movapd(temp1, dest);
    masm.mulpd(temp6, temp6);
    masm.mulpd(dest, temp6);
    masm.addpd(temp5, temp4);
    masm.mulsd(dest, temp6);
    masm.mulpd(temp6, temp8);
    masm.addsd(temp1, temp2);
    masm.unpckhpd(temp2, temp2);
    masm.mulpd(dest, temp5);
    masm.addsd(temp1, dest);
    masm.por(temp2, temp7);
    masm.unpckhpd(dest, dest);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp6);
    masm.addl(gpr4, 894);
    masm.cmpl(gpr4, 1916);
    masm.jcc(ConditionFlag.Above, bb1);
    masm.mulsd(dest, temp2);
    masm.addsd(dest, temp2);
    masm.jmp(bb14);
    masm.bind(bb1);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(expBiasPtr));
    // 0x3ff00000,
    // 0x00000000,
    // 0x3ff00000
    masm.xorpd(temp3, temp3);
    // 0xffffffff,
    masm.movdqu(temp4, externalAddress(allOnesExpPtr));
    // 0xffffffff,
    // 0xffffffff,
    // 0xffffffff
    masm.movl(gpr4, -1022);
    masm.subl(gpr4, gpr1);
    masm.movdl(temp5, gpr4);
    masm.psllq(temp4, temp5);
    masm.movl(gpr3, gpr1);
    masm.sarl(gpr1, 1);
    masm.pinsrw(temp3, gpr1, 3);
    masm.psllq(temp3, 4);
    masm.psubd(temp2, temp3);
    masm.mulsd(dest, temp2);
    masm.cmpl(gpr4, 52);
    masm.jcc(ConditionFlag.Greater, bb2);
    masm.pand(temp4, temp2);
    masm.paddd(temp3, temp6);
    masm.subsd(temp2, temp4);
    masm.addsd(dest, temp2);
    masm.cmpl(gpr3, 1023);
    masm.jcc(ConditionFlag.GreaterEqual, bb3);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32768);
    masm.orl(gpr4, gpr3);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.Equal, bb4);
    masm.movapd(temp6, dest);
    masm.addsd(dest, temp4);
    masm.mulsd(dest, temp3);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb5);
    masm.jmp(bb14);
    masm.bind(bb5);
    masm.mulsd(temp6, temp3);
    masm.mulsd(temp4, temp3);
    masm.movdqu(dest, temp6);
    masm.pxor(temp6, temp4);
    masm.psrad(temp6, 31);
    masm.pshufd(temp6, temp6, 85);
    masm.psllq(dest, 1);
    masm.psrlq(dest, 1);
    masm.pxor(dest, temp6);
    masm.psrlq(temp6, 63);
    masm.paddq(dest, temp6);
    masm.paddq(dest, temp4);
    masm.jmp(bb14);
    masm.bind(bb4);
    masm.addsd(dest, temp4);
    masm.mulsd(dest, temp3);
    masm.jmp(bb14);
    masm.bind(bb3);
    masm.addsd(dest, temp4);
    masm.mulsd(dest, temp3);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.AboveEqual, bb7);
    masm.jmp(bb14);
    masm.bind(bb2);
    masm.paddd(temp3, temp6);
    masm.addpd(dest, temp2);
    masm.mulsd(dest, temp3);
    masm.jmp(bb14);
    masm.bind(bb8);
    // 0xffffffff,
    masm.movsd(dest, externalAddress(xMaxExpPtr));
    // 0x7fefffff
    // 0x00000000,
    masm.movsd(temp8, externalAddress(xMinExpPtr));
    // 0x00100000
    masm.cmpl(gpr1, 2146435072);
    masm.jcc(ConditionFlag.AboveEqual, bb9);
    masm.movl(gpr1, new AMD64Address(gpr5, 4));
    masm.cmpl(gpr1, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.AboveEqual, bb10);
    masm.mulsd(dest, dest);
    masm.bind(bb7);
    masm.jmp(bb14);
    masm.bind(bb10);
    masm.mulsd(dest, temp8);
    masm.jmp(bb14);
    masm.bind(bb9);
    masm.movl(gpr4, stackSlot);
    masm.cmpl(gpr1, 2146435072);
    masm.jcc(ConditionFlag.Above, bb11);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.NotEqual, bb11);
    masm.movl(gpr1, new AMD64Address(gpr5, 4));
    masm.cmpl(gpr1, 2146435072);
    masm.jcc(ConditionFlag.NotEqual, bb12);
    // 0x00000000,
    masm.movsd(dest, externalAddress(infExpPtr));
    // 0x7ff00000
    masm.jmp(bb14);
    masm.bind(bb12);
    // 0x00000000,
    masm.movsd(dest, externalAddress(zeroExpPtr));
    // 0x00000000
    masm.jmp(bb14);
    masm.bind(bb11);
    masm.movsd(dest, stackSlot);
    masm.addsd(dest, dest);
    masm.jmp(bb14);
    masm.bind(bb0);
    masm.movl(gpr1, new AMD64Address(gpr5, 4));
    masm.andl(gpr1, 2147483647);
    masm.cmpl(gpr1, 1083179008);
    masm.jcc(ConditionFlag.AboveEqual, bb8);
    // 0x00000000,
    masm.addsd(dest, externalAddress(onePtr));
    // 0x3ff00000
    masm.bind(bb14);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 2 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method logIntrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG() ---------------------
     *
     * x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7
     *
     * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
     *
     * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7
     * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low
     * parts.
     *
     * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0)
     * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception
     * raised if x < -0, including -INF
     *
     */
public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16);
    ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16);
    ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.movq(gpr1, 0x3ff0000000000000L);
    masm.movdq(temp2, gpr1);
    masm.movq(gpr3, 0x77f0000000000000L);
    masm.movdq(temp3, gpr3);
    masm.movl(gpr2, 32768);
    masm.movdl(temp4, gpr2);
    masm.movq(gpr2, 0xffffe00000000000L);
    masm.movdq(temp5, gpr2);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    masm.leaq(gpr4, externalAddress(logTwoTablePtr));
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0xE4);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.paddd(dest, temp4);
    masm.por(temp1, temp3);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp1);
    masm.pand(dest, temp6);
    masm.subsd(temp1, temp5);
    masm.mulpd(temp5, dest);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulsd(temp1, dest);
    // 0xfefa3800,
    masm.movdq(temp6, externalAddress(logTwoDataPtr));
    // 0x3fa62e42
    // 0x92492492,
    masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr));
    // 0x3fc24924,
    // 0x00000000,
    // 0xbfd00000
    masm.subsd(temp5, temp2);
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0));
    masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr));
    // 0x3d6fb175,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xbfc5555e,
    // 0x55555555,
    // 0x3fd55555
    masm.addsd(temp1, temp5);
    // 0x9999999a,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x3fc99999,
    // 0x00000000,
    // 0xbfe00000
    masm.mulsd(temp6, temp7);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp5, temp1);
    } else {
        masm.movdqu(temp5, temp1);
        masm.movlhps(temp5, temp5);
    }
    masm.leaq(gpr4, externalAddress(logTwoDataPtr));
    // 0x93c76730,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3ceef357
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.mulpd(temp5, temp5);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp6, dest);
    } else {
        masm.movdqu(temp6, dest);
        masm.movlhps(temp6, temp6);
    }
    masm.addsd(dest, temp1);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.subsd(temp6, dest);
    masm.mulsd(temp4, temp1);
    masm.pshufd(temp2, dest, 0xEE);
    masm.addsd(temp1, temp6);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.psrlq(dest, 27);
    masm.movl(gpr2, 18416);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0xE4);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 3 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method cosIntrinsic.

public void cosIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
    ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
    ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
    ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
    ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
    ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
    ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
    ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
    ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
    ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
    ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
    ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
    ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
    ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.leaq(gpr1, stackSlot);
    masm.movl(gpr1, new AMD64Address(gpr1, 4));
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    masm.andl(gpr1, 2147418112);
    masm.subl(gpr1, 808452096);
    masm.cmpl(gpr1, 281346048);
    masm.jcc(ConditionFlag.Above, bb0);
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x1a600000,
    masm.movdqu(temp2, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    // 0x54400000,
    masm.movdq(temp3, externalAddress(pOnePtr));
    // 0x3fb921fb
    masm.mulsd(temp3, temp1);
    masm.unpcklpd(temp1, temp1);
    masm.addq(gpr4, 1865232);
    masm.movdqu(temp4, dest);
    masm.andq(gpr4, 63);
    // 0xa556c734,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3ec71de3,
    // 0x1a01a01a,
    // 0x3efa01a0
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.shlq(gpr4, 5);
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp2, temp1);
    masm.subsd(dest, temp3);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.unpcklpd(dest, dest);
    masm.movdqu(temp3, temp4);
    masm.subsd(temp4, temp2);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp2);
    masm.pshufd(temp7, temp8, 0xE);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp2);
    masm.movdqu(temp2, temp8);
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    masm.pshufd(temp9, temp8, 0xE);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(dest, temp9);
    masm.addsd(temp3, temp7);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp3);
    masm.addsd(dest, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.addsd(dest, temp6);
    masm.addsd(dest, temp4);
    masm.jmp(bb13);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb13);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32767);
    masm.pinsrw(dest, gpr1, 3);
    // 0x00000000,
    masm.movdq(temp1, externalAddress(onePtr));
    // 0x3ff00000
    masm.subsd(temp1, dest);
    masm.movdqu(dest, temp1);
    masm.jmp(bb13);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTablePtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTablePtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 1);
    masm.jcc(ConditionFlag.Less, bb3);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 536870911);
    masm.testl(gpr8, 268435456);
    masm.jcc(ConditionFlag.NotEqual, bb4);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb5);
    masm.bind(bb6);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb8);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb9);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb10);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr2, externalAddress(piFourPtr));
    // 0x40000000,
    masm.movdqu(temp2, new AMD64Address(gpr2, 0));
    // 0x3fe921fb,
    // 0x18469899,
    // 0x3e64442d
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.mulsd(dest, temp2);
    masm.pshufd(temp6, temp2, 0xE);
    masm.shrl(gpr6, 29);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp6, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp6, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp6);
    masm.subsd(temp2, dest);
    masm.addsd(temp6, temp2);
    masm.bind(bb11);
    // 0x6dc9c883,
    masm.movq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdq(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2siq(gpr4, temp1);
    masm.cvtsi2sdq(temp1, gpr4);
    // 0x54400000,
    masm.movdq(temp3, externalAddress(pOnePtr));
    // 0x3fb921fb
    // 0x1a600000,
    masm.movdqu(temp2, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.mulsd(temp3, temp1);
    masm.unpcklpd(temp1, temp1);
    masm.shll(gpr1, 3);
    masm.addl(gpr4, 1865232);
    masm.movdqu(temp4, dest);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 63);
    // 0xa556c734,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3ec71de3,
    // 0x1a01a01a,
    // 0x3efa01a0
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.shll(gpr4, 5);
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp2, temp1);
    masm.subsd(dest, temp3);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.unpcklpd(dest, dest);
    masm.movdqu(temp3, temp4);
    masm.subsd(temp4, temp2);
    masm.mulpd(temp5, dest);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp9, temp7);
    masm.subpd(dest, temp2);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp2);
    masm.movdqu(temp2, temp8);
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.subsd(temp1, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb13);
    masm.bind(bb7);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb8);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb8);
    masm.xorpd(dest, dest);
    masm.xorpd(temp6, temp6);
    masm.jmp(bb11);
    masm.bind(bb9);
    masm.jcc(ConditionFlag.Equal, bb10);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb10);
    masm.bind(bb3);
    masm.negl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb12);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 3);
    masm.jmp(bb6);
    masm.bind(bb4);
    masm.shrl(gpr8);
    masm.movl(gpr2, 536870912);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 536870912);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb5);
    masm.bind(bb12);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 3);
    masm.addl(gpr6, 536870912);
    masm.jmp(bb6);
    masm.bind(bb13);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 4 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method tanIntrinsic.

public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16);
    ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16);
    ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16);
    ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16);
    ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16);
    ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16);
    ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16);
    ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16);
    ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16);
    ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16);
    ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16);
    ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8);
    ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16);
    ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16);
    ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16);
    ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8);
    ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8);
    ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
    ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8);
    ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    setCrb(crb);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16314);
    masm.cmpl(gpr1, 270);
    masm.jcc(ConditionFlag.Above, bb0);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.unpcklpd(dest, dest);
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x3fe45f30,
    // 0x6dc9c883,
    // 0x40245f30
    masm.mulpd(temp1, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp7, temp1);
    masm.unpckhpd(temp7, temp7);
    masm.cvttsd2sil(gpr4, temp7);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.addq(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.andq(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movq(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shlq(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addq(gpr4, gpr3);
    masm.shlq(gpr3, 2);
    masm.addq(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shlq(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.subsd(temp3, temp5);
    masm.mulpd(temp7, dest);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.mulpd(temp1, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.pextrw(gpr1, dest, 3);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb2);
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 15904);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.movdqu(temp2, dest);
    masm.movdqu(temp3, dest);
    // 0xb8fe4d77,
    masm.movdq(temp1, externalAddress(qElevenTanPtr));
    // 0x3f82609a
    masm.mulsd(temp2, dest);
    masm.mulsd(temp3, temp2);
    masm.mulsd(temp1, temp2);
    // 0xbf847a43,
    masm.addsd(temp1, externalAddress(qNineTanPtr));
    // 0x3f9664a0
    masm.mulsd(temp1, temp2);
    // 0x52c4c8ab,
    masm.addsd(temp1, externalAddress(qSevenTanPtr));
    // 0x3faba1ba
    masm.mulsd(temp1, temp2);
    // 0x11092746,
    masm.addsd(temp1, externalAddress(qFiveTanPtr));
    // 0x3fc11111
    masm.mulsd(temp1, temp2);
    // 0x55555612,
    masm.addsd(temp1, externalAddress(qThreeTanPtr));
    // 0x3fd55555
    masm.mulsd(temp1, temp3);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb3);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.addsd(dest, temp3);
    // 0x00000000,
    masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb2);
    masm.movdqu(temp1, dest);
    masm.mulsd(temp1, temp1);
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTableTanPtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTableTanPtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Less, bb5);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 1073741823);
    masm.testl(gpr8, 536870912);
    masm.jcc(ConditionFlag.NotEqual, bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb8);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.bind(bb10);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb11);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb12);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourTanPtr));
    // 0x00000000,
    masm.movdq(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x4611a626,
    masm.movdq(temp7, new AMD64Address(gpr1, 8));
    // 0x3e85110b
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 30);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp7, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp7, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp7);
    masm.subsd(temp2, dest);
    masm.addsd(temp7, temp2);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x40245f30
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(dest, dest);
    } else {
        masm.movlhps(dest, dest);
    }
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    masm.mulpd(temp1, dest);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp7, temp7);
    } else {
        masm.movlhps(temp7, temp7);
    }
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp5, temp1);
    masm.unpckhpd(temp5, temp5);
    masm.cvttsd2sil(gpr4, temp5);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.shll(gpr1, 4);
    masm.addl(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movl(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shll(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addl(gpr4, gpr3);
    masm.shll(gpr3, 2);
    masm.addl(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shll(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.subsd(temp3, temp5);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.addpd(temp2, temp7);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.mulpd(temp7, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp1, dest);
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.jmp(bb12);
    masm.bind(bb11);
    masm.jcc(ConditionFlag.Equal, bb12);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb12);
    masm.bind(bb5);
    masm.notl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 2);
    masm.jmp(bb8);
    masm.bind(bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 1073741824);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 1073741824);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb8);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 2);
    masm.addl(gpr6, 1073741824);
    masm.jmp(bb8);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 5 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method sinIntrinsic.

public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
    ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
    ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
    ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
    ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
    ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
    ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
    ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
    ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
    ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
    ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8);
    ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
    ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
    ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8);
    ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8);
    ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8);
    ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movsd(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.leaq(gpr1, stackSlot);
    masm.movl(gpr1, new AMD64Address(gpr1, 4));
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    // 0x00000000,
    masm.movdq(temp2, externalAddress(shifterPtr));
    // 0x43380000
    masm.andl(gpr1, 2147418112);
    masm.subl(gpr1, 808452096);
    masm.cmpl(gpr1, 281346048);
    masm.jcc(ConditionFlag.Above, bb0);
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x1a600000,
    masm.movdqu(temp6, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.movq(gpr7, 0x3fb921fb54400000L);
    masm.movdq(temp3, gpr7);
    // 0xa556c734,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3ec71de3,
    // 0x1a01a01a,
    // 0x3efa01a0
    masm.pshufd(temp4, dest, 0x44);
    masm.mulsd(temp3, temp1);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp1, temp1);
    } else {
        masm.movlhps(temp1, temp1);
    }
    masm.andl(gpr4, 63);
    masm.shll(gpr4, 5);
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp6, temp1);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.subsd(dest, temp3);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp3, temp4);
    } else {
        masm.movdqu(temp3, temp4);
        masm.movlhps(temp3, temp3);
    }
    masm.subsd(temp4, temp6);
    masm.pshufd(dest, dest, 0x44);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp2, temp8);
    masm.movdqu(temp9, temp7);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp6);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.shrl(gpr1, 20);
    masm.cmpl(gpr1, 3325);
    masm.jcc(ConditionFlag.NotEqual, bb2);
    // 0xffffffff,
    masm.mulsd(dest, externalAddress(allOnesPtr));
    // 0x3fefffff
    masm.jmp(bb15);
    masm.bind(bb2);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.subsd(temp3, dest);
    // 0x00000000,
    masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTablePtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTablePtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 1);
    masm.jcc(ConditionFlag.Less, bb4);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 536870911);
    masm.testl(gpr8, 268435456);
    masm.jcc(ConditionFlag.NotEqual, bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb6);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb8);
    masm.bind(bb9);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb10);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb11);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourPtr));
    // 0x40000000,
    masm.movdqu(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x18469899,
    // 0x3e64442d
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.pshufd(temp6, temp2, 0xE);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 29);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp6, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp6, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp6);
    masm.subsd(temp2, dest);
    masm.addsd(temp6, temp2);
    masm.bind(bb12);
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdq(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x54400000,
    masm.movdq(temp3, externalAddress(pOnePtr));
    // 0x3fb921fb
    // 0x1a600000,
    masm.movdqu(temp2, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.mulsd(temp3, temp1);
    masm.unpcklpd(temp1, temp1);
    masm.shll(gpr1, 3);
    masm.addl(gpr4, 1865216);
    masm.movdqu(temp4, dest);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 63);
    // 0x54400000,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3fb921fb
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.shll(gpr4, 5);
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp2, temp1);
    masm.subsd(dest, temp3);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.unpcklpd(dest, dest);
    masm.movdqu(temp3, temp4);
    masm.subsd(temp4, temp2);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp2);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp9, temp7);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp2);
    masm.movdqu(temp2, temp8);
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.subsd(temp1, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb8);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.xorpd(dest, dest);
    masm.xorpd(temp6, temp6);
    masm.jmp(bb12);
    masm.bind(bb10);
    masm.jcc(ConditionFlag.Equal, bb11);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb11);
    masm.bind(bb4);
    masm.negl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 3);
    masm.jmp(bb6);
    masm.bind(bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 536870912);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 536870912);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb6);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 3);
    masm.addl(gpr6, 536870912);
    masm.jmp(bb6);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

Register (jdk.vm.ci.code.Register)7 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)7 Label (org.graalvm.compiler.asm.Label)7 AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)7 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)7