use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.
/*
* Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
* Source Code
*
* ALGORITHM DESCRIPTION - LOG10() ---------------------
*
* Let x=2^k * mx, mx in [1,2)
*
* Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
* short approximation for log10(e)
*
* Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
*
* Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
* (high, low parts) Result is formed from high and low parts
*
* Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
* NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
*
*/
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movdq(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
// 0xf8000000,
masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
// 0xffffffff,
// 0x00000000,
// 0xffffe000
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movl(gpr2, 1054736384);
masm.movdl(temp7, gpr2);
masm.xorpd(temp3, temp3);
masm.movl(gpr3, 30704);
masm.pinsrw(temp3, gpr3, 3);
masm.movl(gpr3, 32768);
masm.movdl(temp4, gpr3);
masm.movdqu(temp1, value);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 16352);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.bind(bb1);
masm.mulss(dest, temp7);
masm.por(temp1, temp3);
masm.andpd(temp5, temp1);
masm.paddd(dest, temp4);
// 0xc1a5f12e,
masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
// 0x40358874,
// 0x64d4ef0d,
// 0xc0089309
masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
// 0x385593b1,
masm.movdqu(temp4, new AMD64Address(gpr4, 16));
// 0xc025c917,
// 0xdc963467,
// 0x3ffc6a02
masm.subsd(temp1, temp5);
masm.movdl(gpr3, dest);
masm.psllq(dest, 29);
masm.andpd(dest, temp6);
// 0x509f7800,
masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
// 0x3f934413
masm.andl(gpr1, 32752);
masm.subl(gpr1, gpr2);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.mulsd(temp1, dest);
masm.subsd(temp5, temp2);
// 0x7f9d3aa1,
masm.movdqu(temp2, new AMD64Address(gpr4, 32));
// 0x4016ab9f,
// 0xdc77b115,
// 0xbff27af2
masm.leaq(gpr4, externalAddress(logTenTablePtr));
masm.andl(gpr3, 16711680);
masm.shrl(gpr3, 12);
masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
masm.addsd(temp1, temp5);
masm.mulsd(temp6, temp7);
masm.pshufd(temp5, temp1, 0x44);
masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
// 0x1f12b358,
masm.mulsd(temp7, new AMD64Address(gpr4, 8));
// 0x3cdfef31
masm.mulsd(temp3, temp1);
masm.addsd(dest, temp6);
masm.mulpd(temp4, temp5);
masm.leaq(gpr4, externalAddress(logTenEPtr));
// 0xbf2e4108,
masm.movdq(temp6, new AMD64Address(gpr4, 8));
// 0x3f5a7a6c
masm.mulpd(temp5, temp5);
masm.addpd(temp4, temp2);
masm.mulpd(temp3, temp5);
masm.pshufd(temp2, dest, 0xE4);
masm.addsd(dest, temp1);
masm.mulsd(temp4, temp1);
masm.subsd(temp2, dest);
masm.mulsd(temp6, temp1);
masm.addsd(temp1, temp2);
masm.pshufd(temp2, dest, 0xEE);
masm.mulsd(temp5, temp5);
masm.addsd(temp7, temp2);
masm.addsd(temp1, temp6);
masm.addpd(temp4, temp3);
masm.addsd(temp1, temp7);
masm.mulpd(temp4, temp5);
masm.addsd(temp1, temp4);
masm.pshufd(temp5, temp4, 0xEE);
masm.addsd(temp1, temp5);
masm.addsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb0);
masm.movdq(dest, stackSlot);
masm.movdq(temp1, stackSlot);
masm.addl(gpr1, 16);
masm.cmpl(gpr1, 32768);
masm.jcc(ConditionFlag.AboveEqual, bb2);
masm.cmpl(gpr1, 16);
masm.jcc(ConditionFlag.Below, bb3);
masm.bind(bb4);
masm.addsd(dest, dest);
masm.jmp(bb8);
masm.bind(bb5);
masm.jcc(ConditionFlag.Above, bb4);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Above, bb4);
masm.jmp(bb6);
masm.bind(bb3);
masm.xorpd(temp1, temp1);
masm.addsd(temp1, dest);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 18416);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp1, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 18416);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.jmp(bb1);
masm.bind(bb2);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.addl(gpr2, gpr2);
masm.cmpl(gpr2, -2097152);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.bind(bb6);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 49136);
masm.pinsrw(dest, gpr1, 3);
masm.divsd(dest, temp1);
masm.bind(bb8);
}
use of org.graalvm.compiler.lir.asm.ArrayDataPointerConstant in project graal by oracle.
the class AMD64MathIntrinsicBinaryOp method powIntrinsic.
public void powIntrinsic(Register dest, Register value1, Register value2, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant highSigMaskPtr = new ArrayDataPointerConstant(highSigMask, 16);
ArrayDataPointerConstant logTwoEPtr = new ArrayDataPointerConstant(logTwoE, 16);
ArrayDataPointerConstant highmaskYPtr = new ArrayDataPointerConstant(highmaskY, 16);
ArrayDataPointerConstant tExpPtr = new ArrayDataPointerConstant(tExp, 16);
ArrayDataPointerConstant eCoeffPtr = new ArrayDataPointerConstant(eCoeff, 16);
ArrayDataPointerConstant coeffHPtr = new ArrayDataPointerConstant(coeffH, 16);
ArrayDataPointerConstant highmaskLogXPtr = new ArrayDataPointerConstant(highmaskLogX, 16);
ArrayDataPointerConstant halfmaskPtr = new ArrayDataPointerConstant(halfmask, 8);
ArrayDataPointerConstant coeffPowPtr = new ArrayDataPointerConstant(coeffPow, 16);
ArrayDataPointerConstant lTblPowPtr = new ArrayDataPointerConstant(lTblPow, 16);
ArrayDataPointerConstant logTwoPowPtr = new ArrayDataPointerConstant(logTwoPow, 8);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Label bb9 = new Label();
Label bb10 = new Label();
Label bb11 = new Label();
Label bb12 = new Label();
Label bb13 = new Label();
Label bb14 = new Label();
Label bb15 = new Label();
Label bb16 = new Label();
Label bb18 = new Label();
Label bb19 = new Label();
Label bb20 = new Label();
Label bb21 = new Label();
Label bb22 = new Label();
Label bb23 = new Label();
Label bb24 = new Label();
Label bb25 = new Label();
Label bb26 = new Label();
Label bb27 = new Label();
Label bb28 = new Label();
Label bb29 = new Label();
Label bb30 = new Label();
Label bb31 = new Label();
Label bb32 = new Label();
Label bb33 = new Label();
Label bb34 = new Label();
Label bb35 = new Label();
Label bb36 = new Label();
Label bb37 = new Label();
Label bb38 = new Label();
Label bb39 = new Label();
Label bb40 = new Label();
Label bb41 = new Label();
Label bb42 = new Label();
Label bb43 = new Label();
Label bb44 = new Label();
Label bb45 = new Label();
Label bb46 = new Label();
Label bb47 = new Label();
Label bb48 = new Label();
Label bb49 = new Label();
Label bb50 = new Label();
Label bb51 = new Label();
Label bb53 = new Label();
Label bb54 = new Label();
Label bb55 = new Label();
Label bb56 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
Register temp10 = asRegister(xmm10Temp, AMD64Kind.DOUBLE);
setCrb(crb);
masm.movdqu(temp10, value1);
masm.movsd(temp8, value2);
if (dest.encoding != value1.encoding) {
masm.movdqu(dest, value1);
}
// 0x00000000,
masm.movq(temp9, externalAddress(logTwoEPtr));
// 0x3ff72000
masm.pextrw(gpr1, dest, 3);
masm.xorpd(temp2, temp2);
masm.movq(gpr2, 0x3ff0000000000000L);
masm.movdq(temp2, gpr2);
masm.movl(gpr5, 1069088768);
masm.movdq(temp7, gpr5);
masm.xorpd(temp1, temp1);
masm.movq(gpr6, 0x77f0000000000000L);
masm.movdq(temp1, gpr6);
masm.movdqu(temp3, dest);
masm.movl(gpr4, 32752);
masm.andl(gpr4, gpr1);
masm.subl(gpr4, 16368);
masm.movl(gpr3, gpr4);
masm.sarl(gpr4, 31);
masm.addl(gpr3, gpr4);
masm.xorl(gpr3, gpr4);
masm.por(dest, temp2);
// 0x00000000,
masm.movdqu(temp6, externalAddress(highSigMaskPtr));
// 0xfffff800,
// 0x00000000,
// 0xfffff800
masm.psrlq(dest, 27);
masm.psrld(dest, 2);
masm.addl(gpr3, 16);
masm.bsrl(gpr3, gpr3);
masm.rcpps(dest, dest);
masm.psllq(temp3, 12);
masm.movl(gpr7, 8192);
masm.movdq(temp4, gpr7);
masm.psrlq(temp3, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.movl(gpr5, 0);
masm.bind(bb1);
masm.mulss(dest, temp7);
masm.movl(gpr4, -1);
masm.subl(gpr3, 4);
masm.shll(gpr4);
masm.shlq(gpr4, 32);
masm.movdq(temp5, gpr4);
masm.por(temp3, temp1);
masm.subl(gpr1, 16351);
masm.cmpl(gpr1, 1);
masm.jcc(ConditionFlag.BelowEqual, bb2);
masm.paddd(dest, temp4);
masm.pand(temp5, temp3);
masm.movdl(gpr4, dest);
masm.psllq(dest, 29);
masm.bind(bb3);
masm.subsd(temp3, temp5);
masm.pand(dest, temp6);
masm.subl(gpr1, 1);
masm.sarl(gpr1, 4);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.bind(bb4);
masm.mulsd(temp3, dest);
masm.leaq(gpr8, externalAddress(coeffPowPtr));
// 0x6dc96112,
masm.movdqu(temp1, new AMD64Address(gpr8, 0));
// 0xbf836578,
// 0xee241472,
// 0xbf9b0301
// 0x9f95985a,
masm.movdqu(temp4, new AMD64Address(gpr8, 16));
// 0xbfb528db,
// 0xb3841d2a,
// 0xbfd619b6
// 0x518775e3,
masm.movdqu(temp6, new AMD64Address(gpr8, 32));
// 0x3f9004f2,
// 0xac8349bb,
// 0x3fa76c9b
// 0x486ececc,
masm.movdqu(dest, new AMD64Address(gpr8, 48));
// 0x3fc4635e,
// 0x161bb241,
// 0xbf5dabe1
masm.subsd(temp5, temp9);
masm.movl(gpr3, gpr1);
masm.sarl(gpr1, 31);
masm.addl(gpr3, gpr1);
masm.xorl(gpr1, gpr3);
masm.addl(gpr1, 1);
masm.bsrl(gpr1, gpr1);
masm.unpcklpd(temp5, temp3);
masm.addsd(temp3, temp5);
masm.leaq(gpr7, externalAddress(lTblPowPtr));
masm.andl(gpr4, 16760832);
masm.shrl(gpr4, 10);
masm.addpd(temp5, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
masm.pshufd(temp2, temp3, 0x44);
masm.mulsd(temp3, temp3);
masm.mulpd(temp1, temp2);
masm.mulpd(temp4, temp2);
masm.addsd(temp5, temp7);
masm.mulsd(temp2, temp3);
masm.addpd(temp6, temp1);
masm.mulsd(temp3, temp3);
masm.addpd(dest, temp4);
masm.movdqu(temp1, temp8);
masm.pextrw(gpr3, temp8, 3);
masm.pshufd(temp7, temp5, 0xEE);
// 0x00000000,
masm.movq(temp4, externalAddress(highmaskYPtr));
// 0xfffffff8
masm.mulpd(temp6, temp2);
masm.pshufd(temp3, temp3, 0x44);
masm.mulpd(dest, temp2);
masm.shll(gpr1, 4);
masm.subl(gpr1, 15872);
masm.andl(gpr3, 32752);
masm.addl(gpr1, gpr3);
masm.mulpd(temp3, temp6);
masm.cmpl(gpr1, 624);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.xorpd(temp6, temp6);
masm.movl(gpr4, 17080);
masm.pinsrw(temp6, gpr4, 3);
masm.movdqu(temp2, temp1);
masm.pand(temp4, temp1);
masm.subsd(temp1, temp4);
masm.mulsd(temp4, temp5);
masm.addsd(dest, temp7);
masm.mulsd(temp1, temp5);
masm.movdqu(temp7, temp6);
masm.addsd(temp6, temp4);
masm.leaq(gpr7, externalAddress(tExpPtr));
masm.addpd(temp3, dest);
masm.movdl(gpr4, temp6);
masm.movl(gpr3, gpr4);
masm.andl(gpr4, 255);
masm.addl(gpr4, gpr4);
masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
masm.subsd(temp6, temp7);
masm.pshufd(dest, temp3, 0xEE);
masm.subsd(temp4, temp6);
masm.addsd(dest, temp3);
masm.addsd(temp4, temp1);
masm.mulsd(temp2, dest);
masm.leaq(gpr8, externalAddress(eCoeffPtr));
// 0xe78a6731,
masm.movdqu(temp7, new AMD64Address(gpr8, 0));
// 0x3f55d87f,
// 0xd704a0c0,
// 0x3fac6b08
// 0x6fba4e77,
masm.movdqu(temp3, new AMD64Address(gpr8, 16));
// 0x3f83b2ab,
// 0xff82c58f,
// 0x3fcebfbd
masm.shll(gpr3, 12);
masm.xorl(gpr3, gpr5);
masm.andl(gpr3, -1048576);
masm.movdq(temp6, gpr3);
masm.addsd(temp2, temp4);
masm.movq(gpr2, 0x3fe62e42fefa39efL);
masm.movdq(temp1, gpr2);
masm.pshufd(dest, temp2, 0x44);
masm.pshufd(temp4, temp2, 0x44);
masm.mulsd(temp1, temp2);
masm.pshufd(temp6, temp6, 0x11);
masm.mulpd(dest, dest);
masm.mulpd(temp7, temp4);
masm.paddd(temp5, temp6);
masm.mulsd(temp1, temp5);
masm.pshufd(temp6, temp5, 0xEE);
masm.mulsd(dest, dest);
masm.addpd(temp3, temp7);
masm.addsd(temp1, temp6);
masm.mulpd(dest, temp3);
masm.pshufd(temp3, dest, 0xEE);
masm.mulsd(dest, temp5);
masm.mulsd(temp3, temp5);
masm.addsd(dest, temp1);
masm.addsd(dest, temp3);
masm.addsd(dest, temp5);
masm.jmp(bb56);
masm.bind(bb0);
masm.addl(gpr1, 16);
masm.movl(gpr4, 32752);
masm.andl(gpr4, gpr1);
masm.cmpl(gpr4, 32752);
masm.jcc(ConditionFlag.Equal, bb6);
masm.testl(gpr1, 32768);
masm.jcc(ConditionFlag.NotEqual, bb7);
masm.bind(bb8);
masm.movdqu(dest, temp10);
masm.movdqu(temp3, temp10);
masm.movdl(gpr4, temp3);
masm.psrlq(temp3, 32);
masm.movdl(gpr3, temp3);
masm.orl(gpr4, gpr3);
masm.cmpl(gpr4, 0);
masm.jcc(ConditionFlag.Equal, bb9);
masm.xorpd(temp3, temp3);
masm.movl(gpr1, 18416);
masm.pinsrw(temp3, gpr1, 3);
masm.mulsd(dest, temp3);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp3, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr3, 18416);
masm.psrlq(dest, 27);
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp3, 12);
// 0x00000000,
masm.movdqu(temp6, externalAddress(highSigMaskPtr));
// 0xfffff800,
// 0x00000000,
// 0xfffff800
masm.psrlq(temp3, 12);
masm.mulss(dest, temp7);
masm.movl(gpr4, -1024);
masm.movdl(temp5, gpr4);
masm.por(temp3, temp1);
masm.paddd(dest, temp4);
masm.psllq(temp5, 32);
masm.movdl(gpr4, dest);
masm.psllq(dest, 29);
masm.pand(temp5, temp3);
masm.movl(gpr5, 0);
masm.pand(dest, temp6);
masm.subsd(temp3, temp5);
masm.andl(gpr1, 32752);
masm.subl(gpr1, 18416);
masm.sarl(gpr1, 4);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.jmp(bb4);
masm.bind(bb10);
masm.movdqu(dest, temp10);
masm.movdqu(temp3, temp10);
masm.movdl(gpr4, temp3);
masm.psrlq(temp3, 32);
masm.movdl(gpr3, temp3);
masm.orl(gpr4, gpr3);
masm.cmpl(gpr4, 0);
masm.jcc(ConditionFlag.Equal, bb9);
masm.xorpd(temp3, temp3);
masm.movl(gpr1, 18416);
masm.pinsrw(temp3, gpr1, 3);
masm.mulsd(dest, temp3);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp3, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr3, 18416);
masm.psrlq(dest, 27);
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp3, 12);
// 0x00000000,
masm.movdqu(temp6, externalAddress(highSigMaskPtr));
// 0xfffff800,
// 0x00000000,
// 0xfffff800
masm.psrlq(temp3, 12);
masm.mulss(dest, temp7);
masm.movl(gpr4, -1024);
masm.movdl(temp5, gpr4);
masm.por(temp3, temp1);
masm.paddd(dest, temp4);
masm.psllq(temp5, 32);
masm.movdl(gpr4, dest);
masm.psllq(dest, 29);
masm.pand(temp5, temp3);
masm.movl(gpr5, Integer.MIN_VALUE);
masm.pand(dest, temp6);
masm.subsd(temp3, temp5);
masm.andl(gpr1, 32752);
masm.subl(gpr1, 18416);
masm.sarl(gpr1, 4);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.jmp(bb4);
masm.bind(bb5);
masm.cmpl(gpr1, 0);
masm.jcc(ConditionFlag.Less, bb11);
masm.cmpl(gpr1, 752);
masm.jcc(ConditionFlag.AboveEqual, bb12);
masm.addsd(dest, temp7);
// 0xf8000000,
masm.movq(temp4, externalAddress(halfmaskPtr));
// 0xffffffff
masm.addpd(temp3, dest);
masm.xorpd(temp6, temp6);
masm.movl(gpr1, 17080);
masm.pinsrw(temp6, gpr1, 3);
masm.pshufd(dest, temp3, 0xEE);
masm.addsd(dest, temp3);
masm.movdqu(temp3, temp5);
masm.addsd(temp5, dest);
masm.subsd(temp3, temp5);
masm.movdqu(temp7, temp5);
masm.pand(temp5, temp4);
masm.movdqu(temp2, temp1);
masm.pand(temp4, temp1);
masm.subsd(temp7, temp5);
masm.addsd(dest, temp3);
masm.subsd(temp1, temp4);
masm.mulsd(temp4, temp5);
masm.addsd(dest, temp7);
masm.mulsd(temp2, dest);
masm.movdqu(temp7, temp6);
masm.mulsd(temp1, temp5);
masm.addsd(temp6, temp4);
masm.movdl(gpr1, temp6);
masm.subsd(temp6, temp7);
masm.leaq(gpr7, externalAddress(tExpPtr));
masm.movl(gpr3, gpr1);
masm.andl(gpr1, 255);
masm.addl(gpr1, gpr1);
masm.movdqu(temp5, new AMD64Address(gpr7, gpr1, Scale.Times8, 0));
masm.addsd(temp2, temp1);
masm.leaq(gpr8, externalAddress(eCoeffPtr));
// 0xe78a6731,
masm.movdqu(temp7, new AMD64Address(gpr8, 0));
// 0x3f55d87f,
// 0xd704a0c0,
// 0x3fac6b08
// 0x6fba4e77,
masm.movdqu(temp3, new AMD64Address(gpr8, 16));
// 0x3f83b2ab,
// 0xff82c58f,
// 0x3fcebfbd
masm.subsd(temp4, temp6);
masm.pextrw(gpr4, temp6, 3);
masm.addsd(temp2, temp4);
masm.sarl(gpr3, 8);
masm.movl(gpr1, gpr3);
masm.sarl(gpr3, 1);
masm.subl(gpr1, gpr3);
masm.shll(gpr3, 20);
masm.xorl(gpr3, gpr5);
masm.movdl(temp6, gpr3);
// 0xfefa39ef,
masm.movq(temp1, new AMD64Address(gpr8, 32));
// 0x3fe62e42
masm.andl(gpr4, 32767);
masm.cmpl(gpr4, 16529);
masm.jcc(ConditionFlag.Above, bb12);
masm.pshufd(dest, temp2, 0x44);
masm.pshufd(temp4, temp2, 0x44);
masm.mulpd(dest, dest);
masm.mulpd(temp7, temp4);
masm.pshufd(temp6, temp6, 0x11);
masm.mulsd(temp1, temp2);
masm.mulsd(dest, dest);
masm.paddd(temp5, temp6);
masm.addpd(temp3, temp7);
masm.mulsd(temp1, temp5);
masm.pshufd(temp6, temp5, 0xEE);
masm.mulpd(dest, temp3);
masm.addsd(temp1, temp6);
masm.pshufd(temp3, dest, 0xEE);
masm.mulsd(dest, temp5);
masm.mulsd(temp3, temp5);
masm.shll(gpr1, 4);
masm.xorpd(temp4, temp4);
masm.addl(gpr1, 16368);
masm.pinsrw(temp4, gpr1, 3);
masm.addsd(dest, temp1);
masm.addsd(dest, temp3);
masm.movdqu(temp1, dest);
masm.addsd(dest, temp5);
masm.mulsd(dest, temp4);
masm.pextrw(gpr1, dest, 3);
masm.andl(gpr1, 32752);
masm.jcc(ConditionFlag.Equal, bb13);
masm.cmpl(gpr1, 32752);
masm.jcc(ConditionFlag.Equal, bb14);
masm.jmp(bb56);
masm.bind(bb6);
masm.movdqu(temp1, temp8);
masm.movdqu(dest, temp10);
masm.movdqu(temp2, dest);
masm.movdl(gpr1, temp2);
masm.psrlq(temp2, 20);
masm.movdl(gpr4, temp2);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.Equal, bb15);
masm.movdl(gpr1, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr4, temp1);
masm.movl(gpr3, gpr4);
masm.addl(gpr4, gpr4);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.Equal, bb16);
masm.addsd(dest, dest);
masm.jmp(bb56);
masm.bind(bb16);
masm.xorpd(dest, dest);
masm.movl(gpr1, 16368);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb18);
masm.addpd(dest, temp8);
masm.jmp(bb56);
masm.bind(bb15);
masm.movdl(gpr1, temp1);
masm.movdqu(temp2, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr4, temp1);
masm.movl(gpr3, gpr4);
masm.addl(gpr4, gpr4);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.Equal, bb19);
masm.pextrw(gpr1, temp2, 3);
masm.andl(gpr1, 32752);
masm.cmpl(gpr1, 32752);
masm.jcc(ConditionFlag.NotEqual, bb20);
masm.movdl(gpr1, temp2);
masm.psrlq(temp2, 20);
masm.movdl(gpr4, temp2);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.NotEqual, bb18);
masm.bind(bb20);
masm.pextrw(gpr1, dest, 3);
masm.testl(gpr1, 32768);
masm.jcc(ConditionFlag.NotEqual, bb21);
masm.testl(gpr3, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.NotZero, bb22);
masm.jmp(bb56);
masm.bind(bb23);
masm.movdl(gpr1, temp8);
masm.testl(gpr1, 1);
masm.jcc(ConditionFlag.NotEqual, bb24);
masm.testl(gpr1, 2);
masm.jcc(ConditionFlag.NotEqual, bb25);
masm.jmp(bb24);
masm.bind(bb21);
masm.shrl(gpr3, 20);
masm.andl(gpr3, 2047);
masm.cmpl(gpr3, 1075);
masm.jcc(ConditionFlag.Above, bb24);
masm.jcc(ConditionFlag.Equal, bb26);
masm.cmpl(gpr3, 1074);
masm.jcc(ConditionFlag.Above, bb23);
masm.cmpl(gpr3, 1023);
masm.jcc(ConditionFlag.Below, bb24);
masm.movdqu(temp1, temp8);
masm.movl(gpr1, 17208);
masm.xorpd(temp3, temp3);
masm.pinsrw(temp3, gpr1, 3);
masm.movdqu(temp4, temp3);
masm.addsd(temp3, temp1);
masm.subsd(temp4, temp3);
masm.addsd(temp1, temp4);
masm.pextrw(gpr1, temp1, 3);
masm.andl(gpr1, 32752);
masm.jcc(ConditionFlag.NotEqual, bb24);
masm.movdl(gpr1, temp3);
masm.andl(gpr1, 1);
masm.jcc(ConditionFlag.Equal, bb24);
masm.bind(bb25);
masm.pextrw(gpr1, temp8, 3);
masm.andl(gpr1, 32768);
masm.jcc(ConditionFlag.NotEqual, bb27);
masm.jmp(bb56);
masm.bind(bb27);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32768);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb24);
masm.pextrw(gpr1, temp8, 3);
masm.andl(gpr1, 32768);
masm.jcc(ConditionFlag.NotEqual, bb22);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb26);
masm.movdl(gpr1, temp8);
masm.andl(gpr1, 1);
masm.jcc(ConditionFlag.Equal, bb24);
masm.jmp(bb25);
masm.bind(bb28);
masm.movdl(gpr1, temp1);
masm.psrlq(temp1, 20);
masm.movdl(gpr4, temp1);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.Equal, bb29);
masm.addsd(dest, temp8);
masm.jmp(bb56);
masm.bind(bb29);
masm.movdqu(dest, temp10);
masm.pextrw(gpr1, dest, 3);
masm.cmpl(gpr1, 49136);
masm.jcc(ConditionFlag.NotEqual, bb30);
masm.movdl(gpr3, dest);
masm.psrlq(dest, 20);
masm.movdl(gpr4, dest);
masm.orl(gpr3, gpr4);
masm.jcc(ConditionFlag.NotEqual, bb30);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32760);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb30);
masm.andl(gpr1, 32752);
masm.subl(gpr1, 16368);
masm.pextrw(gpr4, temp8, 3);
masm.xorpd(dest, dest);
masm.xorl(gpr1, gpr4);
masm.andl(gpr1, 32768);
masm.jcc(ConditionFlag.Equal, bb31);
masm.jmp(bb56);
masm.bind(bb31);
masm.movl(gpr3, 32752);
masm.pinsrw(dest, gpr3, 3);
masm.jmp(bb56);
masm.bind(bb32);
masm.movdl(gpr1, temp1);
masm.cmpl(gpr4, 17184);
masm.jcc(ConditionFlag.Above, bb33);
masm.testl(gpr1, 1);
masm.jcc(ConditionFlag.NotEqual, bb34);
masm.testl(gpr1, 2);
masm.jcc(ConditionFlag.Equal, bb35);
masm.jmp(bb36);
masm.bind(bb33);
masm.testl(gpr1, 1);
masm.jcc(ConditionFlag.Equal, bb35);
masm.jmp(bb36);
masm.bind(bb7);
masm.movdqu(temp2, temp10);
masm.movdl(gpr1, temp2);
masm.psrlq(temp2, 31);
masm.movdl(gpr3, temp2);
masm.orl(gpr1, gpr3);
masm.jcc(ConditionFlag.Equal, bb9);
masm.pextrw(gpr4, temp8, 3);
masm.movdl(gpr1, temp8);
masm.movdqu(temp2, temp8);
masm.psrlq(temp2, 32);
masm.movdl(gpr3, temp2);
masm.addl(gpr3, gpr3);
masm.orl(gpr3, gpr1);
masm.jcc(ConditionFlag.Equal, bb37);
masm.andl(gpr4, 32752);
masm.cmpl(gpr4, 32752);
masm.jcc(ConditionFlag.Equal, bb28);
masm.cmpl(gpr4, 17200);
masm.jcc(ConditionFlag.Above, bb35);
masm.cmpl(gpr4, 17184);
masm.jcc(ConditionFlag.AboveEqual, bb32);
masm.cmpl(gpr4, 16368);
masm.jcc(ConditionFlag.Below, bb34);
masm.movl(gpr1, 17208);
masm.xorpd(temp2, temp2);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp4, temp2);
masm.addsd(temp2, temp1);
masm.subsd(temp4, temp2);
masm.addsd(temp1, temp4);
masm.pextrw(gpr1, temp1, 3);
masm.andl(gpr1, 32767);
masm.jcc(ConditionFlag.NotEqual, bb34);
masm.movdl(gpr1, temp2);
masm.andl(gpr1, 1);
masm.jcc(ConditionFlag.Equal, bb35);
masm.bind(bb36);
masm.xorpd(temp1, temp1);
masm.movl(gpr4, 30704);
masm.pinsrw(temp1, gpr4, 3);
masm.pextrw(gpr1, temp10, 3);
masm.movl(gpr4, 8192);
masm.movdl(temp4, gpr4);
masm.andl(gpr1, 32767);
masm.subl(gpr1, 16);
masm.jcc(ConditionFlag.Less, bb10);
masm.movl(gpr4, gpr1);
masm.andl(gpr4, 32752);
masm.subl(gpr4, 16368);
masm.movl(gpr3, gpr4);
masm.sarl(gpr4, 31);
masm.addl(gpr3, gpr4);
masm.xorl(gpr3, gpr4);
masm.addl(gpr3, 16);
masm.bsrl(gpr3, gpr3);
masm.movl(gpr5, Integer.MIN_VALUE);
masm.jmp(bb1);
masm.bind(bb34);
masm.xorpd(temp1, temp1);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.xorpd(dest, dest);
masm.mulsd(dest, temp1);
masm.jmp(bb56);
masm.bind(bb35);
masm.xorpd(temp1, temp1);
masm.movl(gpr4, 30704);
masm.pinsrw(temp1, gpr4, 3);
masm.pextrw(gpr1, temp10, 3);
masm.movl(gpr4, 8192);
masm.movdl(temp4, gpr4);
masm.andl(gpr1, 32767);
masm.subl(gpr1, 16);
masm.jcc(ConditionFlag.Less, bb8);
masm.movl(gpr4, gpr1);
masm.andl(gpr4, 32752);
masm.subl(gpr4, 16368);
masm.movl(gpr3, gpr4);
masm.sarl(gpr4, 31);
masm.addl(gpr3, gpr4);
masm.xorl(gpr3, gpr4);
masm.addl(gpr3, 16);
masm.bsrl(gpr3, gpr3);
masm.movl(gpr5, 0);
masm.jmp(bb1);
masm.bind(bb19);
masm.xorpd(dest, dest);
masm.movl(gpr1, 16368);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb22);
masm.xorpd(dest, dest);
masm.jmp(bb56);
masm.bind(bb11);
masm.addl(gpr1, 384);
masm.cmpl(gpr1, 0);
masm.jcc(ConditionFlag.Less, bb38);
masm.mulsd(temp5, temp1);
masm.addsd(dest, temp7);
masm.shrl(gpr5, 31);
masm.addpd(temp3, dest);
masm.pshufd(dest, temp3, 0xEE);
masm.addsd(temp3, dest);
// 0xfefa39ef,
masm.leaq(gpr7, externalAddress(logTwoPowPtr));
// 0x3fe62e42,
// 0xfefa39ef,
// 0xbfe62e42
masm.movq(temp4, new AMD64Address(gpr7, gpr5, Scale.Times8, 0));
masm.mulsd(temp1, temp3);
masm.xorpd(dest, dest);
masm.movl(gpr1, 16368);
masm.shll(gpr5, 15);
masm.orl(gpr1, gpr5);
masm.pinsrw(dest, gpr1, 3);
masm.addsd(temp5, temp1);
masm.mulsd(temp5, temp4);
masm.addsd(dest, temp5);
masm.jmp(bb56);
masm.bind(bb38);
masm.bind(bb37);
masm.xorpd(dest, dest);
masm.movl(gpr1, 16368);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb39);
masm.xorpd(dest, dest);
masm.movl(gpr1, 16368);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb9);
masm.movdqu(temp2, temp8);
masm.pextrw(gpr1, temp8, 3);
masm.andl(gpr1, 32752);
masm.cmpl(gpr1, 32752);
masm.jcc(ConditionFlag.NotEqual, bb40);
masm.movdl(gpr1, temp2);
masm.psrlq(temp2, 20);
masm.movdl(gpr4, temp2);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.NotEqual, bb18);
masm.bind(bb40);
masm.movdl(gpr1, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr4, temp1);
masm.movl(gpr3, gpr4);
masm.addl(gpr4, gpr4);
masm.orl(gpr1, gpr4);
masm.jcc(ConditionFlag.Equal, bb39);
masm.shrl(gpr4, 21);
masm.cmpl(gpr4, 1075);
masm.jcc(ConditionFlag.Above, bb41);
masm.jcc(ConditionFlag.Equal, bb42);
masm.cmpl(gpr4, 1023);
masm.jcc(ConditionFlag.Below, bb41);
masm.movdqu(temp1, temp8);
masm.movl(gpr1, 17208);
masm.xorpd(temp3, temp3);
masm.pinsrw(temp3, gpr1, 3);
masm.movdqu(temp4, temp3);
masm.addsd(temp3, temp1);
masm.subsd(temp4, temp3);
masm.addsd(temp1, temp4);
masm.pextrw(gpr1, temp1, 3);
masm.andl(gpr1, 32752);
masm.jcc(ConditionFlag.NotEqual, bb41);
masm.movdl(gpr1, temp3);
masm.andl(gpr1, 1);
masm.jcc(ConditionFlag.Equal, bb41);
masm.bind(bb43);
masm.movdqu(dest, temp10);
masm.testl(gpr3, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.NotEqual, bb44);
masm.jmp(bb56);
masm.bind(bb42);
masm.movdl(gpr1, temp8);
masm.testl(gpr1, 1);
masm.jcc(ConditionFlag.NotEqual, bb43);
masm.bind(bb41);
masm.testl(gpr3, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.Equal, bb22);
masm.xorpd(dest, dest);
masm.bind(bb44);
masm.movl(gpr1, 16368);
masm.xorpd(temp1, temp1);
masm.pinsrw(temp1, gpr1, 3);
masm.divsd(temp1, dest);
masm.movdqu(dest, temp1);
masm.jmp(bb56);
masm.bind(bb12);
masm.pextrw(gpr1, temp10, 3);
masm.pextrw(gpr4, temp8, 3);
masm.movl(gpr3, 32752);
masm.andl(gpr3, gpr4);
masm.cmpl(gpr3, 32752);
masm.jcc(ConditionFlag.Equal, bb45);
masm.andl(gpr1, 32752);
masm.subl(gpr1, 16368);
masm.xorl(gpr4, gpr1);
masm.testl(gpr4, 32768);
masm.jcc(ConditionFlag.NotEqual, bb46);
masm.bind(bb47);
masm.movl(gpr1, 32736);
masm.pinsrw(dest, gpr1, 3);
masm.shrl(gpr5, 16);
masm.orl(gpr1, gpr5);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.bind(bb14);
masm.jmp(bb56);
masm.bind(bb46);
masm.movl(gpr1, 16);
masm.pinsrw(dest, gpr1, 3);
masm.mulsd(dest, dest);
masm.testl(gpr3, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.Equal, bb48);
masm.movq(gpr2, 0x8000000000000000L);
masm.movdq(temp2, gpr2);
masm.xorpd(dest, temp2);
masm.bind(bb48);
masm.jmp(bb56);
masm.bind(bb13);
masm.pextrw(gpr3, temp5, 3);
masm.pextrw(gpr4, temp4, 3);
masm.movl(gpr1, -1);
masm.andl(gpr3, 32752);
masm.subl(gpr3, 16368);
masm.andl(gpr4, 32752);
masm.addl(gpr4, gpr3);
masm.movl(gpr3, -31);
masm.sarl(gpr4, 4);
masm.subl(gpr3, gpr4);
masm.jcc(ConditionFlag.LessEqual, bb49);
masm.cmpl(gpr3, 20);
masm.jcc(ConditionFlag.Above, bb50);
masm.shll(gpr1);
masm.bind(bb49);
masm.movdl(dest, gpr1);
masm.psllq(dest, 32);
masm.pand(dest, temp5);
masm.subsd(temp5, dest);
masm.addsd(temp5, temp1);
masm.mulsd(dest, temp4);
masm.mulsd(temp5, temp4);
masm.addsd(dest, temp5);
masm.bind(bb50);
masm.jmp(bb48);
masm.bind(bb2);
masm.pextrw(gpr3, temp8, 3);
masm.movl(gpr4, Integer.MIN_VALUE);
masm.movdl(temp1, gpr4);
masm.xorpd(temp7, temp7);
masm.paddd(dest, temp4);
masm.movdl(gpr4, dest);
masm.psllq(dest, 29);
masm.paddq(temp1, temp3);
masm.pand(temp5, temp1);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 16560);
masm.jcc(ConditionFlag.Less, bb3);
masm.leaq(gpr7, externalAddress(lTblPowPtr));
masm.leaq(gpr8, externalAddress(coeffHPtr));
// 0x00000000,
masm.movdqu(temp4, new AMD64Address(gpr8, 0));
// 0xbfd61a00,
// 0x00000000,
// 0xbf5dabe1
masm.pand(dest, temp6);
masm.subsd(temp3, temp5);
masm.addl(gpr1, 16351);
masm.shrl(gpr1, 4);
masm.subl(gpr1, 1022);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.mulsd(temp3, dest);
masm.subsd(temp5, temp9);
masm.pshufd(temp1, temp4, 0xE);
masm.pshufd(temp2, temp3, 0x44);
masm.unpcklpd(temp5, temp3);
masm.addsd(temp3, temp5);
masm.andl(gpr4, 16760832);
masm.shrl(gpr4, 10);
masm.addpd(temp7, new AMD64Address(gpr7, gpr4, Scale.Times1, -3648));
masm.movdqu(temp6, temp4);
masm.mulsd(temp4, temp5);
masm.movdqu(dest, temp1);
masm.mulsd(dest, temp5);
masm.mulsd(temp6, temp2);
masm.mulsd(temp1, temp2);
masm.movdqu(temp2, temp5);
masm.mulsd(temp4, temp5);
masm.addsd(temp5, dest);
masm.movdqu(dest, temp7);
masm.addsd(temp2, temp3);
masm.addsd(temp7, temp5);
masm.mulsd(temp6, temp2);
masm.subsd(dest, temp7);
masm.movdqu(temp2, temp7);
masm.addsd(temp7, temp4);
masm.addsd(dest, temp5);
masm.subsd(temp2, temp7);
masm.addsd(temp4, temp2);
masm.pshufd(temp2, temp5, 0xEE);
masm.movdqu(temp5, temp7);
masm.addsd(temp7, temp2);
masm.addsd(temp4, dest);
masm.leaq(gpr8, externalAddress(coeffPowPtr));
// 0x6dc96112,
masm.movdqu(dest, new AMD64Address(gpr8, 0));
// 0xbf836578,
// 0xee241472,
// 0xbf9b0301
masm.subsd(temp5, temp7);
masm.addsd(temp6, temp4);
masm.movdqu(temp4, temp7);
masm.addsd(temp5, temp2);
masm.addsd(temp7, temp1);
// 0x486ececc,
masm.movdqu(temp2, new AMD64Address(gpr8, 64));
// 0x3fc4635e,
// 0x161bb241,
// 0xbf5dabe1
masm.subsd(temp4, temp7);
masm.addsd(temp6, temp5);
masm.addsd(temp4, temp1);
masm.pshufd(temp5, temp7, 0xEE);
masm.movapd(temp1, temp7);
masm.addsd(temp7, temp5);
masm.subsd(temp1, temp7);
masm.addsd(temp1, temp5);
// 0x9f95985a,
masm.movdqu(temp5, new AMD64Address(gpr8, 80));
// 0xbfb528db,
// 0xf8b5787d,
// 0x3ef2531e
masm.pshufd(temp3, temp3, 0x44);
masm.addsd(temp6, temp4);
masm.addsd(temp6, temp1);
// 0x9f95985a,
masm.movdqu(temp1, new AMD64Address(gpr8, 32));
// 0xbfb528db,
// 0xb3841d2a,
// 0xbfd619b6
masm.mulpd(dest, temp3);
masm.mulpd(temp2, temp3);
masm.pshufd(temp4, temp3, 0x44);
masm.mulpd(temp3, temp3);
masm.addpd(dest, temp1);
masm.addpd(temp5, temp2);
masm.mulsd(temp4, temp3);
// 0xf8000000,
masm.movq(temp2, externalAddress(highmaskLogXPtr));
// 0xffffffff
masm.mulpd(temp3, temp3);
masm.movdqu(temp1, temp8);
masm.pextrw(gpr3, temp8, 3);
masm.mulpd(dest, temp4);
masm.pextrw(gpr1, temp7, 3);
masm.mulpd(temp5, temp4);
masm.mulpd(dest, temp3);
masm.leaq(gpr8, externalAddress(highmaskYPtr));
// 0x00000000,
masm.movq(temp4, new AMD64Address(gpr8, 8));
// 0xffffffff
masm.pand(temp2, temp7);
masm.addsd(temp5, temp6);
masm.subsd(temp7, temp2);
masm.addpd(temp5, dest);
masm.andl(gpr1, 32752);
masm.subl(gpr1, 16368);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 32752);
masm.jcc(ConditionFlag.Equal, bb45);
masm.addl(gpr3, gpr1);
masm.cmpl(gpr3, 16576);
masm.jcc(ConditionFlag.AboveEqual, bb51);
masm.pshufd(dest, temp5, 0xEE);
masm.pand(temp4, temp1);
masm.movdqu(temp3, temp1);
masm.addsd(temp5, dest);
masm.subsd(temp1, temp4);
masm.xorpd(temp6, temp6);
masm.movl(gpr4, 17080);
masm.pinsrw(temp6, gpr4, 3);
masm.addsd(temp7, temp5);
masm.mulsd(temp4, temp2);
masm.mulsd(temp1, temp2);
masm.movdqu(temp5, temp6);
masm.mulsd(temp3, temp7);
masm.addsd(temp6, temp4);
masm.addsd(temp1, temp3);
masm.leaq(gpr8, externalAddress(eCoeffPtr));
// 0xe78a6731,
masm.movdqu(temp7, new AMD64Address(gpr8, 0));
// 0x3f55d87f,
// 0xd704a0c0,
// 0x3fac6b08
masm.movdl(gpr4, temp6);
masm.subsd(temp6, temp5);
masm.leaq(gpr7, externalAddress(tExpPtr));
masm.movl(gpr3, gpr4);
masm.andl(gpr4, 255);
masm.addl(gpr4, gpr4);
masm.movdqu(temp5, new AMD64Address(gpr7, gpr4, Scale.Times8, 0));
// 0x6fba4e77,
masm.movdqu(temp3, new AMD64Address(gpr8, 16));
// 0x3f83b2ab,
// 0xff82c58f,
// 0x3fcebfbd
// 0xfefa39ef,
masm.movq(temp2, new AMD64Address(gpr8, 32));
// 0x3fe62e42
masm.subsd(temp4, temp6);
masm.addsd(temp4, temp1);
masm.pextrw(gpr4, temp6, 3);
masm.shrl(gpr3, 8);
masm.movl(gpr1, gpr3);
masm.shrl(gpr3, 1);
masm.subl(gpr1, gpr3);
masm.shll(gpr3, 20);
masm.movdl(temp6, gpr3);
masm.pshufd(dest, temp4, 0x44);
masm.pshufd(temp1, temp4, 0x44);
masm.mulpd(dest, dest);
masm.mulpd(temp7, temp1);
masm.pshufd(temp6, temp6, 0x11);
masm.mulsd(temp2, temp4);
masm.andl(gpr4, 32767);
masm.cmpl(gpr4, 16529);
masm.jcc(ConditionFlag.Above, bb12);
masm.mulsd(dest, dest);
masm.paddd(temp5, temp6);
masm.addpd(temp3, temp7);
masm.mulsd(temp2, temp5);
masm.pshufd(temp6, temp5, 0xEE);
masm.mulpd(dest, temp3);
masm.addsd(temp2, temp6);
masm.pshufd(temp3, dest, 0xEE);
masm.addl(gpr1, 1023);
masm.shll(gpr1, 20);
masm.orl(gpr1, gpr5);
masm.movdl(temp4, gpr1);
masm.mulsd(dest, temp5);
masm.mulsd(temp3, temp5);
masm.addsd(dest, temp2);
masm.psllq(temp4, 32);
masm.addsd(dest, temp3);
masm.movdqu(temp1, dest);
masm.addsd(dest, temp5);
masm.mulsd(dest, temp4);
masm.pextrw(gpr1, dest, 3);
masm.andl(gpr1, 32752);
masm.jcc(ConditionFlag.Equal, bb13);
masm.cmpl(gpr1, 32752);
masm.jcc(ConditionFlag.Equal, bb14);
masm.jmp(bb56);
masm.bind(bb45);
masm.movdqu(dest, temp10);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 49136);
masm.pinsrw(temp2, gpr1, 3);
masm.addsd(temp2, dest);
masm.pextrw(gpr1, temp2, 3);
masm.cmpl(gpr1, 0);
masm.jcc(ConditionFlag.NotEqual, bb53);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32760);
masm.pinsrw(dest, gpr1, 3);
masm.jmp(bb56);
masm.bind(bb53);
masm.movdqu(temp1, temp8);
masm.movdl(gpr4, temp1);
masm.movdqu(temp3, temp1);
masm.psrlq(temp3, 20);
masm.movdl(gpr3, temp3);
masm.orl(gpr3, gpr4);
masm.jcc(ConditionFlag.Equal, bb54);
masm.addsd(temp1, temp1);
masm.movdqu(dest, temp1);
masm.jmp(bb56);
masm.bind(bb51);
masm.pextrw(gpr1, temp1, 3);
masm.pextrw(gpr3, temp2, 3);
masm.xorl(gpr1, gpr3);
masm.testl(gpr1, 32768);
masm.jcc(ConditionFlag.Equal, bb47);
masm.jmp(bb46);
masm.bind(bb54);
masm.pextrw(gpr1, dest, 3);
masm.andl(gpr1, 32752);
masm.pextrw(gpr4, temp1, 3);
masm.xorpd(dest, dest);
masm.subl(gpr1, 16368);
masm.xorl(gpr1, gpr4);
masm.testl(gpr1, 32768);
masm.jcc(ConditionFlag.Equal, bb55);
masm.jmp(bb56);
masm.bind(bb55);
masm.movl(gpr4, 32752);
masm.pinsrw(dest, gpr4, 3);
masm.jmp(bb56);
masm.bind(bb56);
}
Aggregations