Search in sources :

Example 6 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG10() ---------------------
     *
     * Let x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
     * short approximation for log10(e)
     *
     * Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
     *
     * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
     * (high, low parts) Result is formed from high and low parts
     *
     * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
     * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
     *
     */
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
    ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
    ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
    ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
    ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    // 0xf8000000,
    masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0xffffe000
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movl(gpr2, 1054736384);
    masm.movdl(temp7, gpr2);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr3, 30704);
    masm.pinsrw(temp3, gpr3, 3);
    masm.movl(gpr3, 32768);
    masm.movdl(temp4, gpr3);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.por(temp1, temp3);
    masm.andpd(temp5, temp1);
    masm.paddd(dest, temp4);
    // 0xc1a5f12e,
    masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
    // 0x40358874,
    // 0x64d4ef0d,
    // 0xc0089309
    masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
    // 0x385593b1,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xc025c917,
    // 0xdc963467,
    // 0x3ffc6a02
    masm.subsd(temp1, temp5);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.andpd(dest, temp6);
    // 0x509f7800,
    masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
    // 0x3f934413
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp1, dest);
    masm.subsd(temp5, temp2);
    // 0x7f9d3aa1,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x4016ab9f,
    // 0xdc77b115,
    // 0xbff27af2
    masm.leaq(gpr4, externalAddress(logTenTablePtr));
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
    masm.addsd(temp1, temp5);
    masm.mulsd(temp6, temp7);
    masm.pshufd(temp5, temp1, 0x44);
    masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
    // 0x1f12b358,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3cdfef31
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.leaq(gpr4, externalAddress(logTenEPtr));
    // 0xbf2e4108,
    masm.movdq(temp6, new AMD64Address(gpr4, 8));
    // 0x3f5a7a6c
    masm.mulpd(temp5, temp5);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.pshufd(temp2, dest, 0xE4);
    masm.addsd(dest, temp1);
    masm.mulsd(temp4, temp1);
    masm.subsd(temp2, dest);
    masm.mulsd(temp6, temp1);
    masm.addsd(temp1, temp2);
    masm.pshufd(temp2, dest, 0xEE);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addsd(temp1, temp6);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 18416);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 7 with ArrayDataPointerConstant

use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.

the class AMD64MathIntrinsicBinaryOp method powIntrinsic.

public void powIntrinsic(Register dest, Register value1, Register value2, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highSigMaskPtr = new ArrayDataPointerConstant(highSigMask, 16);
    ArrayDataPointerConstant logTwoEPtr = new ArrayDataPointerConstant(logTwoE, 16);
    ArrayDataPointerConstant highmaskYPtr = new ArrayDataPointerConstant(highmaskY, 16);
    ArrayDataPointerConstant tExpPtr = new ArrayDataPointerConstant(tExp, 16);
    ArrayDataPointerConstant eCoeffPtr = new ArrayDataPointerConstant(eCoeff, 16);
    ArrayDataPointerConstant coeffHPtr = new ArrayDataPointerConstant(coeffH, 16);
    ArrayDataPointerConstant highmaskLogXPtr = new ArrayDataPointerConstant(highmaskLogX, 16);
    ArrayDataPointerConstant halfmaskPtr = new ArrayDataPointerConstant(halfmask, 8);
    ArrayDataPointerConstant coeffPowPtr = new ArrayDataPointerConstant(coeffPow, 16);
    ArrayDataPointerConstant lTblPowPtr = new ArrayDataPointerConstant(lTblPow, 16);
    ArrayDataPointerConstant logTwoPowPtr = new ArrayDataPointerConstant(logTwoPow, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Label bb16 = new Label();
    Label bb18 = new Label();
    Label bb19 = new Label();
    Label bb20 = new Label();
    Label bb21 = new Label();
    Label bb22 = new Label();
    Label bb23 = new Label();
    Label bb24 = new Label();
    Label bb25 = new Label();
    Label bb26 = new Label();
    Label bb27 = new Label();
    Label bb28 = new Label();
    Label bb29 = new Label();
    Label bb30 = new Label();
    Label bb31 = new Label();
    Label bb32 = new Label();
    Label bb33 = new Label();
    Label bb34 = new Label();
    Label bb35 = new Label();
    Label bb36 = new Label();
    Label bb37 = new Label();
    Label bb38 = new Label();
    Label bb39 = new Label();
    Label bb40 = new Label();
    Label bb41 = new Label();
    Label bb42 = new Label();
    Label bb43 = new Label();
    Label bb44 = new Label();
    Label bb45 = new Label();
    Label bb46 = new Label();
    Label bb47 = new Label();
    Label bb48 = new Label();
    Label bb49 = new Label();
    Label bb50 = new Label();
    Label bb51 = new Label();
    Label bb53 = new Label();
    Label bb54 = new Label();
    Label bb55 = new Label();
    Label bb56 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
    setCrb(crb);
    masm.movdqu(temp10, value1);
    masm.movsd(temp8, value2);
    if (dest.encoding != value1.encoding) {
        masm.movdqu(dest, value1);
    }
    // 0x00000000,
    masm.movq(temp9, externalAddress(logTwoEPtr));
    // 0x3ff72000
    masm.pextrw(gpr1, dest, 3);
    masm.xorpd(temp2, temp2);
    masm.movq(gpr2, 0x3ff0000000000000L);
    masm.movdq(temp2, gpr2);
    masm.movl(gpr5, 1069088768);
    masm.movdq(temp7, gpr5);
    masm.xorpd(temp1, temp1);
    masm.movq(gpr6, 0x77f0000000000000L);
    masm.movdq(temp1, gpr6);
    masm.movdqu(temp3, dest);
    masm.movl(gpr4, 32752);
    masm.andl(gpr4, gpr1);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.por(dest, temp2);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    masm.movl(gpr7, 8192);
    masm.movdq(temp4, gpr7);
    masm.psrlq(temp3, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.movl(gpr5, 0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1);
    masm.subl(gpr3, 4);
    masm.shll(gpr4);
    masm.shlq(gpr4, 32);
    masm.movdq(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.subl(gpr1, 16351);
    masm.cmpl(gpr1, 1);
    masm.jcc(ConditionFlag.BelowEqual, bb2);
    masm.paddd(dest, temp4);
    masm.pand(temp5, temp3);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.bind(bb3);
    masm.subsd(temp3, temp5);
    masm.pand(dest, temp6);
    masm.subl(gpr1, 1);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.bind(bb4);
    masm.mulsd(temp3, dest);
    masm.leaq(gpr8, externalAddress(coeffPowPtr));
    // 0x6dc96112,
    masm.movdqu(temp1, new AMD64Address(gpr8, 0));
    // 0xbf836578,
    // 0xee241472,
    // 0xbf9b0301
    // 0x9f95985a,
    masm.movdqu(temp4, new AMD64Address(gpr8, 16));
    // 0xbfb528db,
    // 0xb3841d2a,
    // 0xbfd619b6
    // 0x518775e3,
    masm.movdqu(temp6, new AMD64Address(gpr8, 32));
    // 0x3f9004f2,
    // 0xac8349bb,
    // 0x3fa76c9b
    // 0x486ececc,
    masm.movdqu(dest, new AMD64Address(gpr8, 48));
    // 0x3fc4635e,
    // 0x161bb241,
    // 0xbf5dabe1
    masm.subsd(temp5, temp9);
    masm.movl(gpr3, gpr1);
    masm.sarl(gpr1, 31);
    masm.addl(gpr3, gpr1);
    masm.xorl(gpr1, gpr3);
    masm.addl(gpr1, 1);
    masm.bsrl(gpr1, gpr1);
    masm.unpcklpd(temp5, temp3);
    masm.addsd(temp3, temp5);
    masm.leaq(gpr7, externalAddress(lTblPowPtr));
    masm.andl(gpr4, 16760832);
    masm.shrl(gpr4, 10);
    masm.addpd(temp5, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
    masm.pshufd(temp2, temp3, 0x44);
    masm.mulsd(temp3, temp3);
    masm.mulpd(temp1, temp2);
    masm.mulpd(temp4, temp2);
    masm.addsd(temp5, temp7);
    masm.mulsd(temp2, temp3);
    masm.addpd(temp6, temp1);
    masm.mulsd(temp3, temp3);
    masm.addpd(dest, temp4);
    masm.movdqu(temp1, temp8);
    masm.pextrw(gpr3, temp8, 3);
    masm.pshufd(temp7, temp5, 0xEE);
    // 0x00000000,
    masm.movq(temp4, externalAddress(highmaskYPtr));
    // 0xfffffff8
    masm.mulpd(temp6, temp2);
    masm.pshufd(temp3, temp3, 0x44);
    masm.mulpd(dest, temp2);
    masm.shll(gpr1, 4);
    masm.subl(gpr1, 15872);
    masm.andl(gpr3, 32752);
    masm.addl(gpr1, gpr3);
    masm.mulpd(temp3, temp6);
    masm.cmpl(gpr1, 624);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr4, 17080);
    masm.pinsrw(temp6, gpr4, 3);
    masm.movdqu(temp2, temp1);
    masm.pand(temp4, temp1);
    masm.subsd(temp1, temp4);
    masm.mulsd(temp4, temp5);
    masm.addsd(dest, temp7);
    masm.mulsd(temp1, temp5);
    masm.movdqu(temp7, temp6);
    masm.addsd(temp6, temp4);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.addpd(temp3, dest);
    masm.movdl(gpr4, temp6);
    masm.movl(gpr3, gpr4);
    masm.andl(gpr4, 255);
    masm.addl(gpr4, gpr4);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
    masm.subsd(temp6, temp7);
    masm.pshufd(dest, temp3, 0xEE);
    masm.subsd(temp4, temp6);
    masm.addsd(dest, temp3);
    masm.addsd(temp4, temp1);
    masm.mulsd(temp2, dest);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    masm.shll(gpr3, 12);
    masm.xorl(gpr3, gpr5);
    masm.andl(gpr3, -1048576);
    masm.movdq(temp6, gpr3);
    masm.addsd(temp2, temp4);
    masm.movq(gpr2, 0x3fe62e42fefa39efL);
    masm.movdq(temp1, gpr2);
    masm.pshufd(dest, temp2, 0x44);
    masm.pshufd(temp4, temp2, 0x44);
    masm.mulsd(temp1, temp2);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp4);
    masm.paddd(temp5, temp6);
    masm.mulsd(temp1, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulsd(dest, dest);
    masm.addpd(temp3, temp7);
    masm.addsd(temp1, temp6);
    masm.mulpd(dest, temp3);
    masm.pshufd(temp3, dest, 0xEE);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp3);
    masm.addsd(dest, temp5);
    masm.jmp(bb56);
    masm.bind(bb0);
    masm.addl(gpr1, 16);
    masm.movl(gpr4, 32752);
    masm.andl(gpr4, gpr1);
    masm.cmpl(gpr4, 32752);
    masm.jcc(ConditionFlag.Equal, bb6);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb7);
    masm.bind(bb8);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp3, temp10);
    masm.movdl(gpr4, temp3);
    masm.psrlq(temp3, 32);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr4, gpr3);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp3, gpr1, 3);
    masm.mulsd(dest, temp3);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp3, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr3, 18416);
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(temp3, 12);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1024);
    masm.movdl(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.paddd(dest, temp4);
    masm.psllq(temp5, 32);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp3);
    masm.movl(gpr5, 0);
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 18416);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.jmp(bb4);
    masm.bind(bb10);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp3, temp10);
    masm.movdl(gpr4, temp3);
    masm.psrlq(temp3, 32);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr4, gpr3);
    masm.cmpl(gpr4, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp3, gpr1, 3);
    masm.mulsd(dest, temp3);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp3, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr3, 18416);
    masm.psrlq(dest, 27);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp3, 12);
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(highSigMaskPtr));
    // 0xfffff800,
    // 0x00000000,
    // 0xfffff800
    masm.psrlq(temp3, 12);
    masm.mulss(dest, temp7);
    masm.movl(gpr4, -1024);
    masm.movdl(temp5, gpr4);
    masm.por(temp3, temp1);
    masm.paddd(dest, temp4);
    masm.psllq(temp5, 32);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp3);
    masm.movl(gpr5, Integer.MIN_VALUE);
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 18416);
    masm.sarl(gpr1, 4);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.jmp(bb4);
    masm.bind(bb5);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.Less, bb11);
    masm.cmpl(gpr1, 752);
    masm.jcc(ConditionFlag.AboveEqual, bb12);
    masm.addsd(dest, temp7);
    // 0xf8000000,
    masm.movq(temp4, externalAddress(halfmaskPtr));
    // 0xffffffff
    masm.addpd(temp3, dest);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr1, 17080);
    masm.pinsrw(temp6, gpr1, 3);
    masm.pshufd(dest, temp3, 0xEE);
    masm.addsd(dest, temp3);
    masm.movdqu(temp3, temp5);
    masm.addsd(temp5, dest);
    masm.subsd(temp3, temp5);
    masm.movdqu(temp7, temp5);
    masm.pand(temp5, temp4);
    masm.movdqu(temp2, temp1);
    masm.pand(temp4, temp1);
    masm.subsd(temp7, temp5);
    masm.addsd(dest, temp3);
    masm.subsd(temp1, temp4);
    masm.mulsd(temp4, temp5);
    masm.addsd(dest, temp7);
    masm.mulsd(temp2, dest);
    masm.movdqu(temp7, temp6);
    masm.mulsd(temp1, temp5);
    masm.addsd(temp6, temp4);
    masm.movdl(gpr1, temp6);
    masm.subsd(temp6, temp7);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.movl(gpr3, gpr1);
    masm.andl(gpr1, 255);
    masm.addl(gpr1, gpr1);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr1, Scale.Times8, 0));
    masm.addsd(temp2, temp1);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    masm.subsd(temp4, temp6);
    masm.pextrw(gpr4, temp6, 3);
    masm.addsd(temp2, temp4);
    masm.sarl(gpr3, 8);
    masm.movl(gpr1, gpr3);
    masm.sarl(gpr3, 1);
    masm.subl(gpr1, gpr3);
    masm.shll(gpr3, 20);
    masm.xorl(gpr3, gpr5);
    masm.movdl(temp6, gpr3);
    // 0xfefa39ef,
    masm.movq(temp1, new AMD64Address(gpr8, 32));
    // 0x3fe62e42
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 16529);
    masm.jcc(ConditionFlag.Above, bb12);
    masm.pshufd(dest, temp2, 0x44);
    masm.pshufd(temp4, temp2, 0x44);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp4);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulsd(temp1, temp2);
    masm.mulsd(dest, dest);
    masm.paddd(temp5, temp6);
    masm.addpd(temp3, temp7);
    masm.mulsd(temp1, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulpd(dest, temp3);
    masm.addsd(temp1, temp6);
    masm.pshufd(temp3, dest, 0xEE);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.shll(gpr1, 4);
    masm.xorpd(temp4, temp4);
    masm.addl(gpr1, 16368);
    masm.pinsrw(temp4, gpr1, 3);
    masm.addsd(dest, temp1);
    masm.addsd(dest, temp3);
    masm.movdqu(temp1, dest);
    masm.addsd(dest, temp5);
    masm.mulsd(dest, temp4);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb13);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.jmp(bb56);
    masm.bind(bb6);
    masm.movdqu(temp1, temp8);
    masm.movdqu(dest, temp10);
    masm.movdqu(temp2, dest);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb15);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb16);
    masm.addsd(dest, dest);
    masm.jmp(bb56);
    masm.bind(bb16);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb18);
    masm.addpd(dest, temp8);
    masm.jmp(bb56);
    masm.bind(bb15);
    masm.movdl(gpr1, temp1);
    masm.movdqu(temp2, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb19);
    masm.pextrw(gpr1, temp2, 3);
    masm.andl(gpr1, 32752);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb20);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb18);
    masm.bind(bb20);
    masm.pextrw(gpr1, dest, 3);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb21);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotZero, bb22);
    masm.jmp(bb56);
    masm.bind(bb23);
    masm.movdl(gpr1, temp8);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb24);
    masm.testl(gpr1, 2);
    masm.jcc(ConditionFlag.NotEqual, bb25);
    masm.jmp(bb24);
    masm.bind(bb21);
    masm.shrl(gpr3, 20);
    masm.andl(gpr3, 2047);
    masm.cmpl(gpr3, 1075);
    masm.jcc(ConditionFlag.Above, bb24);
    masm.jcc(ConditionFlag.Equal, bb26);
    masm.cmpl(gpr3, 1074);
    masm.jcc(ConditionFlag.Above, bb23);
    masm.cmpl(gpr3, 1023);
    masm.jcc(ConditionFlag.Below, bb24);
    masm.movdqu(temp1, temp8);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp3, temp3);
    masm.pinsrw(temp3, gpr1, 3);
    masm.movdqu(temp4, temp3);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb24);
    masm.movdl(gpr1, temp3);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb24);
    masm.bind(bb25);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb27);
    masm.jmp(bb56);
    masm.bind(bb27);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32768);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb24);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb22);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb26);
    masm.movdl(gpr1, temp8);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb24);
    masm.jmp(bb25);
    masm.bind(bb28);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 20);
    masm.movdl(gpr4, temp1);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb29);
    masm.addsd(dest, temp8);
    masm.jmp(bb56);
    masm.bind(bb29);
    masm.movdqu(dest, temp10);
    masm.pextrw(gpr1, dest, 3);
    masm.cmpl(gpr1, 49136);
    masm.jcc(ConditionFlag.NotEqual, bb30);
    masm.movdl(gpr3, dest);
    masm.psrlq(dest, 20);
    masm.movdl(gpr4, dest);
    masm.orl(gpr3, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb30);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32760);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb30);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.pextrw(gpr4, temp8, 3);
    masm.xorpd(dest, dest);
    masm.xorl(gpr1, gpr4);
    masm.andl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb31);
    masm.jmp(bb56);
    masm.bind(bb31);
    masm.movl(gpr3, 32752);
    masm.pinsrw(dest, gpr3, 3);
    masm.jmp(bb56);
    masm.bind(bb32);
    masm.movdl(gpr1, temp1);
    masm.cmpl(gpr4, 17184);
    masm.jcc(ConditionFlag.Above, bb33);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb34);
    masm.testl(gpr1, 2);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.jmp(bb36);
    masm.bind(bb33);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.jmp(bb36);
    masm.bind(bb7);
    masm.movdqu(temp2, temp10);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 31);
    masm.movdl(gpr3, temp2);
    masm.orl(gpr1, gpr3);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.pextrw(gpr4, temp8, 3);
    masm.movdl(gpr1, temp8);
    masm.movdqu(temp2, temp8);
    masm.psrlq(temp2, 32);
    masm.movdl(gpr3, temp2);
    masm.addl(gpr3, gpr3);
    masm.orl(gpr3, gpr1);
    masm.jcc(ConditionFlag.Equal, bb37);
    masm.andl(gpr4, 32752);
    masm.cmpl(gpr4, 32752);
    masm.jcc(ConditionFlag.Equal, bb28);
    masm.cmpl(gpr4, 17200);
    masm.jcc(ConditionFlag.Above, bb35);
    masm.cmpl(gpr4, 17184);
    masm.jcc(ConditionFlag.AboveEqual, bb32);
    masm.cmpl(gpr4, 16368);
    masm.jcc(ConditionFlag.Below, bb34);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp2, temp2);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp4, temp2);
    masm.addsd(temp2, temp1);
    masm.subsd(temp4, temp2);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32767);
    masm.jcc(ConditionFlag.NotEqual, bb34);
    masm.movdl(gpr1, temp2);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb35);
    masm.bind(bb36);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr4, 30704);
    masm.pinsrw(temp1, gpr4, 3);
    masm.pextrw(gpr1, temp10, 3);
    masm.movl(gpr4, 8192);
    masm.movdl(temp4, gpr4);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16);
    masm.jcc(ConditionFlag.Less, bb10);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr4, 32752);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.movl(gpr5, Integer.MIN_VALUE);
    masm.jmp(bb1);
    masm.bind(bb34);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.xorpd(dest, dest);
    masm.mulsd(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb35);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr4, 30704);
    masm.pinsrw(temp1, gpr4, 3);
    masm.pextrw(gpr1, temp10, 3);
    masm.movl(gpr4, 8192);
    masm.movdl(temp4, gpr4);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16);
    masm.jcc(ConditionFlag.Less, bb8);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr4, 32752);
    masm.subl(gpr4, 16368);
    masm.movl(gpr3, gpr4);
    masm.sarl(gpr4, 31);
    masm.addl(gpr3, gpr4);
    masm.xorl(gpr3, gpr4);
    masm.addl(gpr3, 16);
    masm.bsrl(gpr3, gpr3);
    masm.movl(gpr5, 0);
    masm.jmp(bb1);
    masm.bind(bb19);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb22);
    masm.xorpd(dest, dest);
    masm.jmp(bb56);
    masm.bind(bb11);
    masm.addl(gpr1, 384);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.Less, bb38);
    masm.mulsd(temp5, temp1);
    masm.addsd(dest, temp7);
    masm.shrl(gpr5, 31);
    masm.addpd(temp3, dest);
    masm.pshufd(dest, temp3, 0xEE);
    masm.addsd(temp3, dest);
    // 0xfefa39ef,
    masm.leaq(gpr7, externalAddress(logTwoPowPtr));
    // 0x3fe62e42,
    // 0xfefa39ef,
    // 0xbfe62e42
    masm.movq(temp4, new AMD64Address(gpr7, gpr5, Scale.Times8, 0));
    masm.mulsd(temp1, temp3);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.shll(gpr5, 15);
    masm.orl(gpr1, gpr5);
    masm.pinsrw(dest, gpr1, 3);
    masm.addsd(temp5, temp1);
    masm.mulsd(temp5, temp4);
    masm.addsd(dest, temp5);
    masm.jmp(bb56);
    masm.bind(bb38);
    masm.bind(bb37);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb39);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 16368);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb9);
    masm.movdqu(temp2, temp8);
    masm.pextrw(gpr1, temp8, 3);
    masm.andl(gpr1, 32752);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb40);
    masm.movdl(gpr1, temp2);
    masm.psrlq(temp2, 20);
    masm.movdl(gpr4, temp2);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.NotEqual, bb18);
    masm.bind(bb40);
    masm.movdl(gpr1, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr4, temp1);
    masm.movl(gpr3, gpr4);
    masm.addl(gpr4, gpr4);
    masm.orl(gpr1, gpr4);
    masm.jcc(ConditionFlag.Equal, bb39);
    masm.shrl(gpr4, 21);
    masm.cmpl(gpr4, 1075);
    masm.jcc(ConditionFlag.Above, bb41);
    masm.jcc(ConditionFlag.Equal, bb42);
    masm.cmpl(gpr4, 1023);
    masm.jcc(ConditionFlag.Below, bb41);
    masm.movdqu(temp1, temp8);
    masm.movl(gpr1, 17208);
    masm.xorpd(temp3, temp3);
    masm.pinsrw(temp3, gpr1, 3);
    masm.movdqu(temp4, temp3);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.pextrw(gpr1, temp1, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.NotEqual, bb41);
    masm.movdl(gpr1, temp3);
    masm.andl(gpr1, 1);
    masm.jcc(ConditionFlag.Equal, bb41);
    masm.bind(bb43);
    masm.movdqu(dest, temp10);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb44);
    masm.jmp(bb56);
    masm.bind(bb42);
    masm.movdl(gpr1, temp8);
    masm.testl(gpr1, 1);
    masm.jcc(ConditionFlag.NotEqual, bb43);
    masm.bind(bb41);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.Equal, bb22);
    masm.xorpd(dest, dest);
    masm.bind(bb44);
    masm.movl(gpr1, 16368);
    masm.xorpd(temp1, temp1);
    masm.pinsrw(temp1, gpr1, 3);
    masm.divsd(temp1, dest);
    masm.movdqu(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb12);
    masm.pextrw(gpr1, temp10, 3);
    masm.pextrw(gpr4, temp8, 3);
    masm.movl(gpr3, 32752);
    masm.andl(gpr3, gpr4);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb45);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.xorl(gpr4, gpr1);
    masm.testl(gpr4, 32768);
    masm.jcc(ConditionFlag.NotEqual, bb46);
    masm.bind(bb47);
    masm.movl(gpr1, 32736);
    masm.pinsrw(dest, gpr1, 3);
    masm.shrl(gpr5, 16);
    masm.orl(gpr1, gpr5);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.bind(bb14);
    masm.jmp(bb56);
    masm.bind(bb46);
    masm.movl(gpr1, 16);
    masm.pinsrw(dest, gpr1, 3);
    masm.mulsd(dest, dest);
    masm.testl(gpr3, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.Equal, bb48);
    masm.movq(gpr2, 0x8000000000000000L);
    masm.movdq(temp2, gpr2);
    masm.xorpd(dest, temp2);
    masm.bind(bb48);
    masm.jmp(bb56);
    masm.bind(bb13);
    masm.pextrw(gpr3, temp5, 3);
    masm.pextrw(gpr4, temp4, 3);
    masm.movl(gpr1, -1);
    masm.andl(gpr3, 32752);
    masm.subl(gpr3, 16368);
    masm.andl(gpr4, 32752);
    masm.addl(gpr4, gpr3);
    masm.movl(gpr3, -31);
    masm.sarl(gpr4, 4);
    masm.subl(gpr3, gpr4);
    masm.jcc(ConditionFlag.LessEqual, bb49);
    masm.cmpl(gpr3, 20);
    masm.jcc(ConditionFlag.Above, bb50);
    masm.shll(gpr1);
    masm.bind(bb49);
    masm.movdl(dest, gpr1);
    masm.psllq(dest, 32);
    masm.pand(dest, temp5);
    masm.subsd(temp5, dest);
    masm.addsd(temp5, temp1);
    masm.mulsd(dest, temp4);
    masm.mulsd(temp5, temp4);
    masm.addsd(dest, temp5);
    masm.bind(bb50);
    masm.jmp(bb48);
    masm.bind(bb2);
    masm.pextrw(gpr3, temp8, 3);
    masm.movl(gpr4, Integer.MIN_VALUE);
    masm.movdl(temp1, gpr4);
    masm.xorpd(temp7, temp7);
    masm.paddd(dest, temp4);
    masm.movdl(gpr4, dest);
    masm.psllq(dest, 29);
    masm.paddq(temp1, temp3);
    masm.pand(temp5, temp1);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 16560);
    masm.jcc(ConditionFlag.Less, bb3);
    masm.leaq(gpr7, externalAddress(lTblPowPtr));
    masm.leaq(gpr8, externalAddress(coeffHPtr));
    // 0x00000000,
    masm.movdqu(temp4, new AMD64Address(gpr8, 0));
    // 0xbfd61a00,
    // 0x00000000,
    // 0xbf5dabe1
    masm.pand(dest, temp6);
    masm.subsd(temp3, temp5);
    masm.addl(gpr1, 16351);
    masm.shrl(gpr1, 4);
    masm.subl(gpr1, 1022);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp3, dest);
    masm.subsd(temp5, temp9);
    masm.pshufd(temp1, temp4, 0xE);
    masm.pshufd(temp2, temp3, 0x44);
    masm.unpcklpd(temp5, temp3);
    masm.addsd(temp3, temp5);
    masm.andl(gpr4, 16760832);
    masm.shrl(gpr4, 10);
    masm.addpd(temp7, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
    masm.movdqu(temp6, temp4);
    masm.mulsd(temp4, temp5);
    masm.movdqu(dest, temp1);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp6, temp2);
    masm.mulsd(temp1, temp2);
    masm.movdqu(temp2, temp5);
    masm.mulsd(temp4, temp5);
    masm.addsd(temp5, dest);
    masm.movdqu(dest, temp7);
    masm.addsd(temp2, temp3);
    masm.addsd(temp7, temp5);
    masm.mulsd(temp6, temp2);
    masm.subsd(dest, temp7);
    masm.movdqu(temp2, temp7);
    masm.addsd(temp7, temp4);
    masm.addsd(dest, temp5);
    masm.subsd(temp2, temp7);
    masm.addsd(temp4, temp2);
    masm.pshufd(temp2, temp5, 0xEE);
    masm.movdqu(temp5, temp7);
    masm.addsd(temp7, temp2);
    masm.addsd(temp4, dest);
    masm.leaq(gpr8, externalAddress(coeffPowPtr));
    // 0x6dc96112,
    masm.movdqu(dest, new AMD64Address(gpr8, 0));
    // 0xbf836578,
    // 0xee241472,
    // 0xbf9b0301
    masm.subsd(temp5, temp7);
    masm.addsd(temp6, temp4);
    masm.movdqu(temp4, temp7);
    masm.addsd(temp5, temp2);
    masm.addsd(temp7, temp1);
    // 0x486ececc,
    masm.movdqu(temp2, new AMD64Address(gpr8, 64));
    // 0x3fc4635e,
    // 0x161bb241,
    // 0xbf5dabe1
    masm.subsd(temp4, temp7);
    masm.addsd(temp6, temp5);
    masm.addsd(temp4, temp1);
    masm.pshufd(temp5, temp7, 0xEE);
    masm.movapd(temp1, temp7);
    masm.addsd(temp7, temp5);
    masm.subsd(temp1, temp7);
    masm.addsd(temp1, temp5);
    // 0x9f95985a,
    masm.movdqu(temp5, new AMD64Address(gpr8, 80));
    // 0xbfb528db,
    // 0xf8b5787d,
    // 0x3ef2531e
    masm.pshufd(temp3, temp3, 0x44);
    masm.addsd(temp6, temp4);
    masm.addsd(temp6, temp1);
    // 0x9f95985a,
    masm.movdqu(temp1, new AMD64Address(gpr8, 32));
    // 0xbfb528db,
    // 0xb3841d2a,
    // 0xbfd619b6
    masm.mulpd(dest, temp3);
    masm.mulpd(temp2, temp3);
    masm.pshufd(temp4, temp3, 0x44);
    masm.mulpd(temp3, temp3);
    masm.addpd(dest, temp1);
    masm.addpd(temp5, temp2);
    masm.mulsd(temp4, temp3);
    // 0xf8000000,
    masm.movq(temp2, externalAddress(highmaskLogXPtr));
    // 0xffffffff
    masm.mulpd(temp3, temp3);
    masm.movdqu(temp1, temp8);
    masm.pextrw(gpr3, temp8, 3);
    masm.mulpd(dest, temp4);
    masm.pextrw(gpr1, temp7, 3);
    masm.mulpd(temp5, temp4);
    masm.mulpd(dest, temp3);
    masm.leaq(gpr8, externalAddress(highmaskYPtr));
    // 0x00000000,
    masm.movq(temp4, new AMD64Address(gpr8, 8));
    // 0xffffffff
    masm.pand(temp2, temp7);
    masm.addsd(temp5, temp6);
    masm.subsd(temp7, temp2);
    masm.addpd(temp5, dest);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, 16368);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb45);
    masm.addl(gpr3, gpr1);
    masm.cmpl(gpr3, 16576);
    masm.jcc(ConditionFlag.AboveEqual, bb51);
    masm.pshufd(dest, temp5, 0xEE);
    masm.pand(temp4, temp1);
    masm.movdqu(temp3, temp1);
    masm.addsd(temp5, dest);
    masm.subsd(temp1, temp4);
    masm.xorpd(temp6, temp6);
    masm.movl(gpr4, 17080);
    masm.pinsrw(temp6, gpr4, 3);
    masm.addsd(temp7, temp5);
    masm.mulsd(temp4, temp2);
    masm.mulsd(temp1, temp2);
    masm.movdqu(temp5, temp6);
    masm.mulsd(temp3, temp7);
    masm.addsd(temp6, temp4);
    masm.addsd(temp1, temp3);
    masm.leaq(gpr8, externalAddress(eCoeffPtr));
    // 0xe78a6731,
    masm.movdqu(temp7, new AMD64Address(gpr8, 0));
    // 0x3f55d87f,
    // 0xd704a0c0,
    // 0x3fac6b08
    masm.movdl(gpr4, temp6);
    masm.subsd(temp6, temp5);
    masm.leaq(gpr7, externalAddress(tExpPtr));
    masm.movl(gpr3, gpr4);
    masm.andl(gpr4, 255);
    masm.addl(gpr4, gpr4);
    masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
    // 0x6fba4e77,
    masm.movdqu(temp3, new AMD64Address(gpr8, 16));
    // 0x3f83b2ab,
    // 0xff82c58f,
    // 0x3fcebfbd
    // 0xfefa39ef,
    masm.movq(temp2, new AMD64Address(gpr8, 32));
    // 0x3fe62e42
    masm.subsd(temp4, temp6);
    masm.addsd(temp4, temp1);
    masm.pextrw(gpr4, temp6, 3);
    masm.shrl(gpr3, 8);
    masm.movl(gpr1, gpr3);
    masm.shrl(gpr3, 1);
    masm.subl(gpr1, gpr3);
    masm.shll(gpr3, 20);
    masm.movdl(temp6, gpr3);
    masm.pshufd(dest, temp4, 0x44);
    masm.pshufd(temp1, temp4, 0x44);
    masm.mulpd(dest, dest);
    masm.mulpd(temp7, temp1);
    masm.pshufd(temp6, temp6, 0x11);
    masm.mulsd(temp2, temp4);
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 16529);
    masm.jcc(ConditionFlag.Above, bb12);
    masm.mulsd(dest, dest);
    masm.paddd(temp5, temp6);
    masm.addpd(temp3, temp7);
    masm.mulsd(temp2, temp5);
    masm.pshufd(temp6, temp5, 0xEE);
    masm.mulpd(dest, temp3);
    masm.addsd(temp2, temp6);
    masm.pshufd(temp3, dest, 0xEE);
    masm.addl(gpr1, 1023);
    masm.shll(gpr1, 20);
    masm.orl(gpr1, gpr5);
    masm.movdl(temp4, gpr1);
    masm.mulsd(dest, temp5);
    masm.mulsd(temp3, temp5);
    masm.addsd(dest, temp2);
    masm.psllq(temp4, 32);
    masm.addsd(dest, temp3);
    masm.movdqu(temp1, dest);
    masm.addsd(dest, temp5);
    masm.mulsd(dest, temp4);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb13);
    masm.cmpl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.jmp(bb56);
    masm.bind(bb45);
    masm.movdqu(dest, temp10);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 49136);
    masm.pinsrw(temp2, gpr1, 3);
    masm.addsd(temp2, dest);
    masm.pextrw(gpr1, temp2, 3);
    masm.cmpl(gpr1, 0);
    masm.jcc(ConditionFlag.NotEqual, bb53);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32760);
    masm.pinsrw(dest, gpr1, 3);
    masm.jmp(bb56);
    masm.bind(bb53);
    masm.movdqu(temp1, temp8);
    masm.movdl(gpr4, temp1);
    masm.movdqu(temp3, temp1);
    masm.psrlq(temp3, 20);
    masm.movdl(gpr3, temp3);
    masm.orl(gpr3, gpr4);
    masm.jcc(ConditionFlag.Equal, bb54);
    masm.addsd(temp1, temp1);
    masm.movdqu(dest, temp1);
    masm.jmp(bb56);
    masm.bind(bb51);
    masm.pextrw(gpr1, temp1, 3);
    masm.pextrw(gpr3, temp2, 3);
    masm.xorl(gpr1, gpr3);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb47);
    masm.jmp(bb46);
    masm.bind(bb54);
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32752);
    masm.pextrw(gpr4, temp1, 3);
    masm.xorpd(dest, dest);
    masm.subl(gpr1, 16368);
    masm.xorl(gpr1, gpr4);
    masm.testl(gpr1, 32768);
    masm.jcc(ConditionFlag.Equal, bb55);
    masm.jmp(bb56);
    masm.bind(bb55);
    masm.movl(gpr4, 32752);
    masm.pinsrw(dest, gpr4, 3);
    masm.jmp(bb56);
    masm.bind(bb56);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

Register (jdk.vm.ci.code.Register)7 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)7 Label (org.graalvm.compiler.asm.Label)7 AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)7 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)7