Search in sources :

Example 26 with AMD64MacroAssembler

use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.

the class AMD64HotSpotSafepointOp method emitGlobalPoll.

private static void emitGlobalPoll(CompilationResultBuilder crb, AMD64MacroAssembler asm, GraalHotSpotVMConfig config, boolean atReturn, LIRFrameState state, Register scratch) {
    assert !atReturn || state == null : "state is unneeded at return";
    if (ImmutableCode.getValue(crb.getOptions())) {
        JavaKind hostWordKind = JavaKind.Long;
        int alignment = hostWordKind.getBitCount() / Byte.SIZE;
        JavaConstant pollingPageAddress = JavaConstant.forIntegerKind(hostWordKind, config.safepointPollingAddress);
        // co-located with the immutable code.
        if (GeneratePIC.getValue(crb.getOptions())) {
            asm.movq(scratch, asm.getPlaceholder(-1));
        } else {
            asm.movq(scratch, (AMD64Address) crb.recordDataReferenceInCode(pollingPageAddress, alignment));
        }
        final int pos = asm.position();
        crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
        if (state != null) {
            crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
        }
        asm.testl(rax, new AMD64Address(scratch));
    } else if (isPollingPageFar(config)) {
        asm.movq(scratch, config.safepointPollingAddress);
        crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
        final int pos = asm.position();
        if (state != null) {
            crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
        }
        asm.testl(rax, new AMD64Address(scratch));
    } else {
        crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_NEAR : config.MARKID_POLL_NEAR);
        final int pos = asm.position();
        if (state != null) {
            crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
        }
        // The C++ code transforms the polling page offset into an RIP displacement
        // to the real address at that offset in the polling page.
        asm.testl(rax, new AMD64Address(rip, 0));
    }
}
Also used : JavaConstant(jdk.vm.ci.meta.JavaConstant) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address) JavaKind(jdk.vm.ci.meta.JavaKind)

Example 27 with AMD64MacroAssembler

use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.

the class AMD64HotSpotUnwindOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    leaveFrameAndRestoreRbp(crb, masm);
    ForeignCallLinkage linkage = crb.foreignCalls.lookupForeignCall(UNWIND_EXCEPTION_TO_CALLER);
    CallingConvention cc = linkage.getOutgoingCallingConvention();
    assert cc.getArgumentCount() == 2;
    assert exception.equals(cc.getArgument(0));
    // Get return address (is on top of stack after leave).
    Register returnAddress = asRegister(cc.getArgument(1));
    masm.movq(returnAddress, new AMD64Address(rsp, 0));
    AMD64Call.directJmp(crb, masm, linkage);
}
Also used : CallingConvention(jdk.vm.ci.code.CallingConvention) Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ForeignCallLinkage(org.graalvm.compiler.core.common.spi.ForeignCallLinkage) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 28 with AMD64MacroAssembler

use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method tanIntrinsic.

public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16);
    ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16);
    ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16);
    ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16);
    ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16);
    ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16);
    ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16);
    ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16);
    ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16);
    ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16);
    ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16);
    ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8);
    ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16);
    ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16);
    ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16);
    ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8);
    ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8);
    ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
    ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8);
    ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    setCrb(crb);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16314);
    masm.cmpl(gpr1, 270);
    masm.jcc(ConditionFlag.Above, bb0);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.unpcklpd(dest, dest);
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x3fe45f30,
    // 0x6dc9c883,
    // 0x40245f30
    masm.mulpd(temp1, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp7, temp1);
    masm.unpckhpd(temp7, temp7);
    masm.cvttsd2sil(gpr4, temp7);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.addq(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.andq(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movq(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shlq(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addq(gpr4, gpr3);
    masm.shlq(gpr3, 2);
    masm.addq(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shlq(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.subsd(temp3, temp5);
    masm.mulpd(temp7, dest);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.mulpd(temp1, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.pextrw(gpr1, dest, 3);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb2);
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 15904);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.movdqu(temp2, dest);
    masm.movdqu(temp3, dest);
    // 0xb8fe4d77,
    masm.movdq(temp1, externalAddress(qElevenTanPtr));
    // 0x3f82609a
    masm.mulsd(temp2, dest);
    masm.mulsd(temp3, temp2);
    masm.mulsd(temp1, temp2);
    // 0xbf847a43,
    masm.addsd(temp1, externalAddress(qNineTanPtr));
    // 0x3f9664a0
    masm.mulsd(temp1, temp2);
    // 0x52c4c8ab,
    masm.addsd(temp1, externalAddress(qSevenTanPtr));
    // 0x3faba1ba
    masm.mulsd(temp1, temp2);
    // 0x11092746,
    masm.addsd(temp1, externalAddress(qFiveTanPtr));
    // 0x3fc11111
    masm.mulsd(temp1, temp2);
    // 0x55555612,
    masm.addsd(temp1, externalAddress(qThreeTanPtr));
    // 0x3fd55555
    masm.mulsd(temp1, temp3);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb3);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.addsd(dest, temp3);
    // 0x00000000,
    masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb2);
    masm.movdqu(temp1, dest);
    masm.mulsd(temp1, temp1);
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTableTanPtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTableTanPtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Less, bb5);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 1073741823);
    masm.testl(gpr8, 536870912);
    masm.jcc(ConditionFlag.NotEqual, bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb8);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.bind(bb10);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb11);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb12);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourTanPtr));
    // 0x00000000,
    masm.movdq(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x4611a626,
    masm.movdq(temp7, new AMD64Address(gpr1, 8));
    // 0x3e85110b
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 30);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp7, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp7, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp7);
    masm.subsd(temp2, dest);
    masm.addsd(temp7, temp2);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x40245f30
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(dest, dest);
    } else {
        masm.movlhps(dest, dest);
    }
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    masm.mulpd(temp1, dest);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp7, temp7);
    } else {
        masm.movlhps(temp7, temp7);
    }
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp5, temp1);
    masm.unpckhpd(temp5, temp5);
    masm.cvttsd2sil(gpr4, temp5);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.shll(gpr1, 4);
    masm.addl(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movl(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shll(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addl(gpr4, gpr3);
    masm.shll(gpr3, 2);
    masm.addl(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shll(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.subsd(temp3, temp5);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.addpd(temp2, temp7);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.mulpd(temp7, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp1, dest);
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.jmp(bb12);
    masm.bind(bb11);
    masm.jcc(ConditionFlag.Equal, bb12);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb12);
    masm.bind(bb5);
    masm.notl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 2);
    masm.jmp(bb8);
    masm.bind(bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 1073741824);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 1073741824);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb8);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 2);
    masm.addl(gpr6, 1073741824);
    masm.jmp(bb8);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 29 with AMD64MacroAssembler

use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method sinIntrinsic.

public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
    ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
    ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
    ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
    ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
    ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
    ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
    ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
    ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
    ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
    ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8);
    ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
    ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
    ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8);
    ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8);
    ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8);
    ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
    Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movsd(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.leaq(gpr1, stackSlot);
    masm.movl(gpr1, new AMD64Address(gpr1, 4));
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    // 0x00000000,
    masm.movdq(temp2, externalAddress(shifterPtr));
    // 0x43380000
    masm.andl(gpr1, 2147418112);
    masm.subl(gpr1, 808452096);
    masm.cmpl(gpr1, 281346048);
    masm.jcc(ConditionFlag.Above, bb0);
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x1a600000,
    masm.movdqu(temp6, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.movq(gpr7, 0x3fb921fb54400000L);
    masm.movdq(temp3, gpr7);
    // 0xa556c734,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3ec71de3,
    // 0x1a01a01a,
    // 0x3efa01a0
    masm.pshufd(temp4, dest, 0x44);
    masm.mulsd(temp3, temp1);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp1, temp1);
    } else {
        masm.movlhps(temp1, temp1);
    }
    masm.andl(gpr4, 63);
    masm.shll(gpr4, 5);
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp6, temp1);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.subsd(dest, temp3);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp3, temp4);
    } else {
        masm.movdqu(temp3, temp4);
        masm.movlhps(temp3, temp3);
    }
    masm.subsd(temp4, temp6);
    masm.pshufd(dest, dest, 0x44);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp2, temp8);
    masm.movdqu(temp9, temp7);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp6);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.shrl(gpr1, 20);
    masm.cmpl(gpr1, 3325);
    masm.jcc(ConditionFlag.NotEqual, bb2);
    // 0xffffffff,
    masm.mulsd(dest, externalAddress(allOnesPtr));
    // 0x3fefffff
    masm.jmp(bb15);
    masm.bind(bb2);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.subsd(temp3, dest);
    // 0x00000000,
    masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTablePtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTablePtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 1);
    masm.jcc(ConditionFlag.Less, bb4);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 536870911);
    masm.testl(gpr8, 268435456);
    masm.jcc(ConditionFlag.NotEqual, bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb6);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb8);
    masm.bind(bb9);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb10);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb11);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourPtr));
    // 0x40000000,
    masm.movdqu(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x18469899,
    // 0x3e64442d
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.pshufd(temp6, temp2, 0xE);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 29);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp6, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp6, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp6);
    masm.subsd(temp2, dest);
    masm.addsd(temp6, temp2);
    masm.bind(bb12);
    // 0x6dc9c883,
    masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
    // 0x40245f30
    masm.mulsd(temp1, dest);
    // 0x00000000,
    masm.movdq(temp5, externalAddress(oneHalfPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdq(temp4, externalAddress(signMaskPtr));
    // 0x80000000
    masm.pand(temp4, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.cvttsd2sil(gpr4, temp1);
    masm.cvtsi2sdl(temp1, gpr4);
    // 0x54400000,
    masm.movdq(temp3, externalAddress(pOnePtr));
    // 0x3fb921fb
    // 0x1a600000,
    masm.movdqu(temp2, externalAddress(pTwoPtr));
    // 0x3d90b461,
    // 0x1a600000,
    // 0x3d90b461
    masm.mulsd(temp3, temp1);
    masm.unpcklpd(temp1, temp1);
    masm.shll(gpr1, 3);
    masm.addl(gpr4, 1865216);
    masm.movdqu(temp4, dest);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 63);
    // 0x54400000,
    masm.movdqu(temp5, externalAddress(scFourPtr));
    // 0x3fb921fb
    masm.leaq(gpr1, externalAddress(cTablePtr));
    masm.shll(gpr4, 5);
    masm.addq(gpr1, gpr4);
    masm.movdqu(temp8, new AMD64Address(gpr1, 0));
    masm.mulpd(temp2, temp1);
    masm.subsd(dest, temp3);
    // 0x2e037073,
    masm.mulsd(temp1, externalAddress(pThreePtr));
    // 0x3b63198a
    masm.subsd(temp4, temp3);
    masm.unpcklpd(dest, dest);
    masm.movdqu(temp3, temp4);
    masm.subsd(temp4, temp2);
    masm.mulpd(temp5, dest);
    masm.subpd(dest, temp2);
    masm.pshufd(temp7, temp8, 0xE);
    masm.movdqu(temp9, temp7);
    masm.mulsd(temp7, temp4);
    masm.subsd(temp3, temp4);
    masm.mulpd(temp5, dest);
    masm.mulpd(dest, dest);
    masm.subsd(temp3, temp2);
    masm.movdqu(temp2, temp8);
    masm.subsd(temp1, temp3);
    masm.movdq(temp3, new AMD64Address(gpr1, 24));
    masm.addsd(temp2, temp3);
    masm.subsd(temp7, temp2);
    masm.subsd(temp1, temp6);
    // 0x11111111,
    masm.movdqu(temp6, externalAddress(scTwoPtr));
    // 0x3f811111,
    // 0x55555555,
    // 0x3fa55555
    masm.mulsd(temp2, temp4);
    masm.mulpd(temp6, dest);
    masm.mulsd(temp3, temp4);
    masm.mulpd(temp2, dest);
    masm.mulpd(dest, dest);
    // 0x1a01a01a,
    masm.addpd(temp5, externalAddress(scThreePtr));
    // 0xbf2a01a0,
    // 0x16c16c17,
    // 0xbf56c16c
    masm.mulsd(temp4, temp8);
    // 0x55555555,
    masm.addpd(temp6, externalAddress(scOnePtr));
    // 0xbfc55555,
    // 0x00000000,
    // 0xbfe00000
    masm.mulpd(temp5, dest);
    masm.movdqu(dest, temp3);
    masm.addsd(temp3, temp9);
    masm.mulpd(temp1, temp7);
    masm.movdqu(temp7, temp4);
    masm.addsd(temp4, temp3);
    masm.addpd(temp6, temp5);
    masm.subsd(temp9, temp3);
    masm.subsd(temp3, temp4);
    masm.addsd(temp1, new AMD64Address(gpr1, 16));
    masm.mulpd(temp6, temp2);
    masm.addsd(temp9, dest);
    masm.addsd(temp3, temp7);
    masm.addsd(temp1, temp9);
    masm.addsd(temp1, temp3);
    masm.addsd(temp1, temp6);
    masm.unpckhpd(temp6, temp6);
    masm.movdqu(dest, temp4);
    masm.addsd(temp1, temp6);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb8);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb9);
    masm.xorpd(dest, dest);
    masm.xorpd(temp6, temp6);
    masm.jmp(bb12);
    masm.bind(bb10);
    masm.jcc(ConditionFlag.Equal, bb11);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb11);
    masm.bind(bb4);
    masm.negl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 3);
    masm.jmp(bb6);
    masm.bind(bb5);
    masm.shrl(gpr8);
    masm.movl(gpr2, 536870912);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 536870912);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb6);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 3);
    masm.addl(gpr6, 536870912);
    masm.jmp(bb6);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 30 with AMD64MacroAssembler

use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG10() ---------------------
     *
     * Let x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
     * short approximation for log10(e)
     *
     * Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
     *
     * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
     * (high, low parts) Result is formed from high and low parts
     *
     * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
     * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
     *
     */
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
    ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
    ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
    ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
    ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    // 0xf8000000,
    masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0xffffe000
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movl(gpr2, 1054736384);
    masm.movdl(temp7, gpr2);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr3, 30704);
    masm.pinsrw(temp3, gpr3, 3);
    masm.movl(gpr3, 32768);
    masm.movdl(temp4, gpr3);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.por(temp1, temp3);
    masm.andpd(temp5, temp1);
    masm.paddd(dest, temp4);
    // 0xc1a5f12e,
    masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
    // 0x40358874,
    // 0x64d4ef0d,
    // 0xc0089309
    masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
    // 0x385593b1,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xc025c917,
    // 0xdc963467,
    // 0x3ffc6a02
    masm.subsd(temp1, temp5);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.andpd(dest, temp6);
    // 0x509f7800,
    masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
    // 0x3f934413
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp1, dest);
    masm.subsd(temp5, temp2);
    // 0x7f9d3aa1,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x4016ab9f,
    // 0xdc77b115,
    // 0xbff27af2
    masm.leaq(gpr4, externalAddress(logTenTablePtr));
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
    masm.addsd(temp1, temp5);
    masm.mulsd(temp6, temp7);
    masm.pshufd(temp5, temp1, 0x44);
    masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
    // 0x1f12b358,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3cdfef31
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.leaq(gpr4, externalAddress(logTenEPtr));
    // 0xbf2e4108,
    masm.movdq(temp6, new AMD64Address(gpr4, 8));
    // 0x3f5a7a6c
    masm.mulpd(temp5, temp5);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.pshufd(temp2, dest, 0xE4);
    masm.addsd(dest, temp1);
    masm.mulsd(temp4, temp1);
    masm.subsd(temp2, dest);
    masm.mulsd(temp6, temp1);
    masm.addsd(temp1, temp2);
    masm.pshufd(temp2, dest, 0xEE);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addsd(temp1, temp6);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 18416);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)32 Register (jdk.vm.ci.code.Register)23 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)19 Label (org.graalvm.compiler.asm.Label)16 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)7 AMD64MacroAssembler (org.graalvm.compiler.asm.amd64.AMD64MacroAssembler)6 CallingConvention (jdk.vm.ci.code.CallingConvention)4 RegisterConfig (jdk.vm.ci.code.RegisterConfig)4 TargetDescription (jdk.vm.ci.code.TargetDescription)3 AMD64Kind (jdk.vm.ci.amd64.AMD64Kind)2 DataSectionReference (jdk.vm.ci.code.site.DataSectionReference)2 AssemblerTest (org.graalvm.compiler.asm.test.AssemblerTest)2 CompilationResult (org.graalvm.compiler.code.CompilationResult)2 Data (org.graalvm.compiler.code.DataSection.Data)2 RawData (org.graalvm.compiler.code.DataSection.RawData)2 SerializableData (org.graalvm.compiler.code.DataSection.SerializableData)2 FrameMap (org.graalvm.compiler.lir.framemap.FrameMap)2 Test (org.junit.Test)2 CGlobalDataReference (com.oracle.svm.core.graal.code.CGlobalDataReference)1 SubstrateRegisterConfig (com.oracle.svm.core.graal.meta.SubstrateRegisterConfig)1