Search in sources :

Example 26 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64HotSpotUnwindOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    leaveFrameAndRestoreRbp(crb, masm);
    ForeignCallLinkage linkage = crb.foreignCalls.lookupForeignCall(UNWIND_EXCEPTION_TO_CALLER);
    CallingConvention cc = linkage.getOutgoingCallingConvention();
    assert cc.getArgumentCount() == 2;
    assert exception.equals(cc.getArgument(0));
    // Get return address (is on top of stack after leave).
    Register returnAddress = asRegister(cc.getArgument(1));
    masm.movq(returnAddress, new AMD64Address(rsp, 0));
    AMD64Call.directJmp(crb, masm, linkage);
}
Also used : CallingConvention(jdk.vm.ci.code.CallingConvention) Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ForeignCallLinkage(org.graalvm.compiler.core.common.spi.ForeignCallLinkage) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 27 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64Assembler method emitOperandHelper.

/**
 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 * extension in the R field.
 *
 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 *            so that the start position of the next instruction can be computed even though
 *            this instruction has not been completely emitted yet.
 */
protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
    assert (reg & 0x07) == reg;
    int regenc = reg << 3;
    Register base = addr.getBase();
    Register index = addr.getIndex();
    AMD64Address.Scale scale = addr.getScale();
    int disp = addr.getDisplacement();
    if (base.equals(AMD64.rip)) {
        // [00 000 101] disp32
        assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
        emitByte(0x05 | regenc);
        if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
            codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
        }
        emitInt(disp);
    } else if (base.isValid()) {
        int baseenc = base.isValid() ? encode(base) : 0;
        if (index.isValid()) {
            int indexenc = encode(index) << 3;
            // [base + indexscale + disp]
            if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
                // [00 reg 100][ss index base]
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x04 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
            } else if (isByte(disp) && !force4Byte) {
                // [01 reg 100][ss index base] imm8
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x44 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
                emitByte(disp & 0xFF);
            } else {
                // [10 reg 100][ss index base] disp32
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x84 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
                emitInt(disp);
            }
        } else if (base.equals(rsp) || base.equals(r12)) {
            // [rsp + disp]
            if (disp == 0) {
                // [rsp]
                // [00 reg 100][00 100 100]
                emitByte(0x04 | regenc);
                emitByte(0x24);
            } else if (isByte(disp) && !force4Byte) {
                // [rsp + imm8]
                // [01 reg 100][00 100 100] disp8
                emitByte(0x44 | regenc);
                emitByte(0x24);
                emitByte(disp & 0xFF);
            } else {
                // [rsp + imm32]
                // [10 reg 100][00 100 100] disp32
                emitByte(0x84 | regenc);
                emitByte(0x24);
                emitInt(disp);
            }
        } else {
            // [base + disp]
            assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
            if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
                // [base]
                // [00 reg base]
                emitByte(0x00 | regenc | baseenc);
            } else if (isByte(disp) && !force4Byte) {
                // [base + disp8]
                // [01 reg base] disp8
                emitByte(0x40 | regenc | baseenc);
                emitByte(disp & 0xFF);
            } else {
                // [base + disp32]
                // [10 reg base] disp32
                emitByte(0x80 | regenc | baseenc);
                emitInt(disp);
            }
        }
    } else {
        if (index.isValid()) {
            int indexenc = encode(index) << 3;
            // [00 reg 100][ss index 101] disp32
            assert !index.equals(rsp) : "illegal addressing mode";
            emitByte(0x04 | regenc);
            emitByte(scale.log2 << 6 | indexenc | 0x05);
            emitInt(disp);
        } else {
            // [disp] ABSOLUTE
            // [00 reg 100][00 100 101] disp32
            emitByte(0x04 | regenc);
            emitByte(0x25);
            emitInt(disp);
        }
    }
    setCurAttributes(null);
}
Also used : Register(jdk.vm.ci.code.Register) Scale(org.graalvm.compiler.asm.amd64.AMD64Address.Scale)

Example 28 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class BitOpsTest method lzcntqMemTest.

@Test
public void lzcntqMemTest() {
    if (lzcntSupported) {
        CodeGenTest test = new CodeGenTest() {

            @Override
            public byte[] generateCode(CompilationResult compResult, TargetDescription target, RegisterConfig registerConfig, CallingConvention cc) {
                AMD64Assembler asm = new AMD64Assembler(target);
                Register ret = registerConfig.getReturnRegister(JavaKind.Int);
                try {
                    Field f = LongField.class.getDeclaredField("x");
                    AMD64Address arg = new AMD64Address(asRegister(cc.getArgument(0)), (int) UNSAFE.objectFieldOffset(f));
                    LZCNT.emit(asm, QWORD, ret, arg);
                    asm.ret(0);
                    return asm.close(true);
                } catch (Exception e) {
                    throw new RuntimeException("exception while trying to generate field access:", e);
                }
            }
        };
        assertReturn("longFieldStub", test, 63, new LongField(1));
    }
}
Also used : CallingConvention(jdk.vm.ci.code.CallingConvention) RegisterConfig(jdk.vm.ci.code.RegisterConfig) Field(java.lang.reflect.Field) Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) AMD64Assembler(org.graalvm.compiler.asm.amd64.AMD64Assembler) TargetDescription(jdk.vm.ci.code.TargetDescription) CompilationResult(org.graalvm.compiler.code.CompilationResult) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address) Test(org.junit.Test) AssemblerTest(org.graalvm.compiler.asm.test.AssemblerTest)

Example 29 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class BitOpsTest method tzcntqMemTest.

@Test
public void tzcntqMemTest() {
    if (tzcntSupported) {
        CodeGenTest test = new CodeGenTest() {

            @Override
            public byte[] generateCode(CompilationResult compResult, TargetDescription target, RegisterConfig registerConfig, CallingConvention cc) {
                AMD64Assembler asm = new AMD64Assembler(target);
                Register ret = registerConfig.getReturnRegister(JavaKind.Int);
                try {
                    Field f = LongField.class.getDeclaredField("x");
                    AMD64Address arg = new AMD64Address(asRegister(cc.getArgument(0)), (int) UNSAFE.objectFieldOffset(f));
                    TZCNT.emit(asm, QWORD, ret, arg);
                    asm.ret(0);
                    return asm.close(true);
                } catch (Exception e) {
                    throw new RuntimeException("exception while trying to generate field access:", e);
                }
            }
        };
        assertReturn("longFieldStub", test, 63, new LongField(0x8000_0000_0000_0000L));
    }
}
Also used : CallingConvention(jdk.vm.ci.code.CallingConvention) RegisterConfig(jdk.vm.ci.code.RegisterConfig) Field(java.lang.reflect.Field) Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) AMD64Assembler(org.graalvm.compiler.asm.amd64.AMD64Assembler) TargetDescription(jdk.vm.ci.code.TargetDescription) CompilationResult(org.graalvm.compiler.code.CompilationResult) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address) Test(org.junit.Test) AssemblerTest(org.graalvm.compiler.asm.test.AssemblerTest)

Example 30 with AMD64Address

use of org.graalvm.compiler.asm.amd64.AMD64Address in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method tanIntrinsic.

public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16);
    ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16);
    ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16);
    ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16);
    ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16);
    ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16);
    ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16);
    ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16);
    ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16);
    ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16);
    ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16);
    ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8);
    ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16);
    ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16);
    ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16);
    ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8);
    ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8);
    ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
    ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8);
    ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb8 = new Label();
    Label bb9 = new Label();
    Label bb10 = new Label();
    Label bb11 = new Label();
    Label bb12 = new Label();
    Label bb13 = new Label();
    Label bb14 = new Label();
    Label bb15 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
    Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
    Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
    Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
    Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
    Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    setCrb(crb);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.pextrw(gpr1, dest, 3);
    masm.andl(gpr1, 32767);
    masm.subl(gpr1, 16314);
    masm.cmpl(gpr1, 270);
    masm.jcc(ConditionFlag.Above, bb0);
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.unpcklpd(dest, dest);
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x3fe45f30,
    // 0x6dc9c883,
    // 0x40245f30
    masm.mulpd(temp1, dest);
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp7, temp1);
    masm.unpckhpd(temp7, temp7);
    masm.cvttsd2sil(gpr4, temp7);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.addq(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.andq(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movq(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shlq(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addq(gpr4, gpr3);
    masm.shlq(gpr3, 2);
    masm.addq(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shlq(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.subsd(temp3, temp5);
    masm.mulpd(temp7, dest);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.mulpd(temp1, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb0);
    masm.jcc(ConditionFlag.Greater, bb1);
    masm.pextrw(gpr1, dest, 3);
    masm.movl(gpr4, gpr1);
    masm.andl(gpr1, 32752);
    masm.jcc(ConditionFlag.Equal, bb2);
    masm.andl(gpr4, 32767);
    masm.cmpl(gpr4, 15904);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.movdqu(temp2, dest);
    masm.movdqu(temp3, dest);
    // 0xb8fe4d77,
    masm.movdq(temp1, externalAddress(qElevenTanPtr));
    // 0x3f82609a
    masm.mulsd(temp2, dest);
    masm.mulsd(temp3, temp2);
    masm.mulsd(temp1, temp2);
    // 0xbf847a43,
    masm.addsd(temp1, externalAddress(qNineTanPtr));
    // 0x3f9664a0
    masm.mulsd(temp1, temp2);
    // 0x52c4c8ab,
    masm.addsd(temp1, externalAddress(qSevenTanPtr));
    // 0x3faba1ba
    masm.mulsd(temp1, temp2);
    // 0x11092746,
    masm.addsd(temp1, externalAddress(qFiveTanPtr));
    // 0x3fc11111
    masm.mulsd(temp1, temp2);
    // 0x55555612,
    masm.addsd(temp1, externalAddress(qThreeTanPtr));
    // 0x3fd55555
    masm.mulsd(temp1, temp3);
    masm.addsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb3);
    // 0x00000000,
    masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr));
    // 0x43600000
    masm.mulsd(temp3, dest);
    masm.addsd(dest, temp3);
    // 0x00000000,
    masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr));
    // 0x3c800000
    masm.jmp(bb15);
    masm.bind(bb14);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.divsd(dest, temp1);
    masm.jmp(bb15);
    masm.bind(bb2);
    masm.movdqu(temp1, dest);
    masm.mulsd(temp1, temp1);
    masm.jmp(bb15);
    masm.bind(bb1);
    masm.pextrw(gpr3, dest, 3);
    masm.andl(gpr3, 32752);
    masm.cmpl(gpr3, 32752);
    masm.jcc(ConditionFlag.Equal, bb14);
    masm.subl(gpr3, 16224);
    masm.shrl(gpr3, 7);
    masm.andl(gpr3, 65532);
    masm.leaq(gpr10, externalAddress(piInvTableTanPtr));
    masm.addq(gpr3, gpr10);
    masm.movdq(gpr1, dest);
    masm.movl(gpr9, new AMD64Address(gpr3, 20));
    masm.movl(gpr7, new AMD64Address(gpr3, 24));
    masm.movl(gpr4, gpr1);
    masm.shrq(gpr1, 21);
    masm.orl(gpr1, Integer.MIN_VALUE);
    masm.shrl(gpr1, 11);
    masm.movl(gpr8, gpr9);
    masm.imulq(gpr9, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.imulq(gpr7, gpr1);
    masm.movl(gpr5, new AMD64Address(gpr3, 16));
    masm.movl(gpr6, new AMD64Address(gpr3, 12));
    masm.movl(gpr10, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr8, gpr9);
    masm.addq(gpr10, gpr7);
    masm.movl(gpr7, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr8, gpr10);
    masm.movl(gpr9, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr9, gpr1);
    masm.movl(gpr10, gpr6);
    masm.imulq(gpr6, gpr4);
    masm.movl(gpr2, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr8, gpr2);
    masm.movl(gpr2, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr9, gpr5);
    masm.addq(gpr9, gpr8);
    masm.shlq(gpr2, 32);
    masm.orq(gpr7, gpr2);
    masm.imulq(gpr10, gpr1);
    masm.movl(gpr8, new AMD64Address(gpr3, 8));
    masm.movl(gpr5, new AMD64Address(gpr3, 4));
    masm.movl(gpr2, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr9, gpr2);
    masm.movl(gpr2, gpr9);
    masm.shrq(gpr9, 32);
    masm.addq(gpr10, gpr6);
    masm.addq(gpr10, gpr9);
    masm.movq(gpr6, gpr8);
    masm.imulq(gpr8, gpr4);
    masm.imulq(gpr6, gpr1);
    masm.movl(gpr9, gpr8);
    masm.shrq(gpr8, 32);
    masm.addq(gpr10, gpr9);
    masm.movl(gpr9, gpr10);
    masm.shrq(gpr10, 32);
    masm.addq(gpr6, gpr8);
    masm.addq(gpr6, gpr10);
    masm.movq(gpr8, gpr5);
    masm.imulq(gpr5, gpr4);
    masm.imulq(gpr8, gpr1);
    masm.shlq(gpr9, 32);
    masm.orq(gpr9, gpr2);
    masm.movl(gpr1, new AMD64Address(gpr3, 0));
    masm.movl(gpr10, gpr5);
    masm.shrq(gpr5, 32);
    masm.addq(gpr6, gpr10);
    masm.movl(gpr10, gpr6);
    masm.shrq(gpr6, 32);
    masm.addq(gpr8, gpr5);
    masm.addq(gpr8, gpr6);
    masm.imulq(gpr4, gpr1);
    masm.pextrw(gpr2, dest, 3);
    masm.leaq(gpr6, externalAddress(piInvTableTanPtr));
    masm.subq(gpr3, gpr6);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, gpr3);
    masm.addl(gpr3, 19);
    masm.movl(gpr5, 32768);
    masm.andl(gpr5, gpr2);
    masm.shrl(gpr2, 4);
    masm.andl(gpr2, 2047);
    masm.subl(gpr2, 1023);
    masm.subl(gpr3, gpr2);
    masm.addq(gpr8, gpr4);
    masm.movl(gpr4, gpr3);
    masm.addl(gpr4, 32);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Less, bb5);
    masm.negl(gpr3);
    masm.addl(gpr3, 29);
    masm.shll(gpr8);
    masm.movl(gpr6, gpr8);
    masm.andl(gpr8, 1073741823);
    masm.testl(gpr8, 536870912);
    masm.jcc(ConditionFlag.NotEqual, bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.bind(bb8);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.Equal, bb9);
    masm.bind(bb10);
    masm.bsrq(gpr10, gpr8);
    masm.movl(gpr3, 29);
    masm.subl(gpr3, gpr10);
    masm.jcc(ConditionFlag.LessEqual, bb11);
    masm.shlq(gpr8);
    masm.movq(gpr1, gpr9);
    masm.shlq(gpr9);
    masm.addl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shrq(gpr1);
    masm.shrq(gpr7);
    masm.orq(gpr8, gpr1);
    masm.orq(gpr9, gpr7);
    masm.bind(bb12);
    masm.cvtsi2sdq(dest, gpr8);
    masm.shrq(gpr9, 1);
    masm.cvtsi2sdq(temp3, gpr9);
    masm.xorpd(temp4, temp4);
    masm.shll(gpr4, 4);
    masm.negl(gpr4);
    masm.addl(gpr4, 16368);
    masm.orl(gpr4, gpr5);
    masm.xorl(gpr4, gpr2);
    masm.pinsrw(temp4, gpr4, 3);
    masm.leaq(gpr1, externalAddress(piFourTanPtr));
    // 0x00000000,
    masm.movdq(temp2, new AMD64Address(gpr1, 0));
    // 0x3fe921fb,
    // 0x4611a626,
    masm.movdq(temp7, new AMD64Address(gpr1, 8));
    // 0x3e85110b
    masm.xorpd(temp5, temp5);
    masm.subl(gpr4, 1008);
    masm.pinsrw(temp5, gpr4, 3);
    masm.mulsd(dest, temp4);
    masm.shll(gpr5, 16);
    masm.sarl(gpr5, 31);
    masm.mulsd(temp3, temp5);
    masm.movdqu(temp1, dest);
    masm.mulsd(dest, temp2);
    masm.shrl(gpr6, 30);
    masm.addsd(temp1, temp3);
    masm.mulsd(temp3, temp2);
    masm.addl(gpr6, gpr5);
    masm.xorl(gpr6, gpr5);
    masm.mulsd(temp7, temp1);
    masm.movl(gpr1, gpr6);
    masm.addsd(temp7, temp3);
    masm.movdqu(temp2, dest);
    masm.addsd(dest, temp7);
    masm.subsd(temp2, dest);
    masm.addsd(temp7, temp2);
    // 0x6dc9c883,
    masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
    // 0x40245f30
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(dest, dest);
    } else {
        masm.movlhps(dest, dest);
    }
    // 0x00000000,
    masm.movdqu(temp4, externalAddress(signMaskTanPtr));
    // 0x80000000,
    // 0x00000000,
    // 0x80000000
    masm.andpd(temp4, dest);
    masm.mulpd(temp1, dest);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp7, temp7);
    } else {
        masm.movlhps(temp7, temp7);
    }
    // 0x00000000,
    masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
    // 0x3fe00000,
    // 0x00000000,
    // 0x3fe00000
    // 0x00000000,
    masm.movdqu(temp6, externalAddress(mulSixteenPtr));
    // 0x40300000,
    // 0x00000000,
    // 0x3ff00000
    masm.por(temp5, temp4);
    masm.addpd(temp1, temp5);
    masm.movdqu(temp5, temp1);
    masm.unpckhpd(temp5, temp5);
    masm.cvttsd2sil(gpr4, temp5);
    masm.cvttpd2dq(temp1, temp1);
    masm.cvtdq2pd(temp1, temp1);
    masm.mulpd(temp1, temp6);
    // 0x54444000,
    masm.movdqu(temp3, externalAddress(pOneTanPtr));
    // 0x3fb921fb,
    // 0x54440000,
    // 0x3fb921fb
    // 0x676733af,
    masm.movdq(temp5, externalAddress(qqTwoTanPtr));
    // 0x3d32e7b9
    masm.shll(gpr1, 4);
    masm.addl(gpr4, 469248);
    // 0x67674000,
    masm.movdqu(temp4, externalAddress(pTwoTanPtr));
    // 0xbd32e7b9,
    // 0x4c4c0000,
    // 0x3d468c23
    masm.mulpd(temp3, temp1);
    masm.addl(gpr4, gpr1);
    masm.andl(gpr4, 31);
    masm.mulsd(temp5, temp1);
    masm.movl(gpr3, gpr4);
    masm.mulpd(temp4, temp1);
    masm.shll(gpr3, 1);
    masm.subpd(dest, temp3);
    // 0x3707344a,
    masm.mulpd(temp1, externalAddress(pThreeTanPtr));
    // 0x3aa8a2e0,
    // 0x03707345,
    // 0x3ae98a2e
    masm.addl(gpr4, gpr3);
    masm.shll(gpr3, 2);
    masm.addl(gpr4, gpr3);
    masm.addsd(temp5, dest);
    masm.movdqu(temp2, dest);
    masm.subpd(dest, temp4);
    // 0x00000000,
    masm.movdq(temp6, externalAddress(onePtr));
    // 0x3ff00000
    masm.shll(gpr4, 4);
    masm.leaq(gpr1, externalAddress(cTableTanPtr));
    // 0xfffc0000,
    masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0x00000000
    masm.movdqu(temp3, dest);
    masm.addq(gpr1, gpr4);
    masm.subpd(temp2, dest);
    masm.unpckhpd(dest, dest);
    masm.divsd(temp6, temp5);
    masm.subpd(temp2, temp4);
    masm.subsd(temp3, temp5);
    masm.subpd(temp2, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 48));
    masm.addpd(temp2, temp7);
    masm.movdqu(temp7, new AMD64Address(gpr1, 16));
    masm.mulpd(temp7, dest);
    masm.movdqu(temp4, new AMD64Address(gpr1, 96));
    masm.mulpd(temp1, dest);
    masm.mulpd(temp4, dest);
    masm.addsd(temp2, temp3);
    masm.movdqu(temp3, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp7, new AMD64Address(gpr1, 0));
    masm.addpd(temp1, new AMD64Address(gpr1, 32));
    masm.mulpd(temp1, dest);
    masm.addpd(temp4, new AMD64Address(gpr1, 80));
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 112));
    masm.mulpd(temp1, dest);
    masm.mulpd(dest, dest);
    masm.addpd(temp4, temp1);
    masm.movdqu(temp1, new AMD64Address(gpr1, 64));
    masm.mulpd(temp1, dest);
    masm.addpd(temp7, temp1);
    masm.movdqu(temp1, temp3);
    masm.mulpd(temp3, dest);
    masm.mulsd(dest, dest);
    masm.mulpd(temp1, new AMD64Address(gpr1, 144));
    masm.mulpd(temp4, temp3);
    masm.movdqu(temp3, temp1);
    masm.addpd(temp7, temp4);
    masm.movdqu(temp4, temp1);
    masm.mulsd(dest, temp7);
    masm.unpckhpd(temp7, temp7);
    masm.addsd(dest, temp7);
    masm.unpckhpd(temp1, temp1);
    masm.addsd(temp3, temp1);
    masm.subsd(temp4, temp3);
    masm.addsd(temp1, temp4);
    masm.movdqu(temp4, temp2);
    masm.movdq(temp7, new AMD64Address(gpr1, 144));
    masm.unpckhpd(temp2, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 152));
    masm.mulsd(temp7, temp2);
    masm.addsd(temp7, new AMD64Address(gpr1, 136));
    masm.addsd(temp7, temp1);
    masm.addsd(dest, temp7);
    // 0x00000000,
    masm.movdq(temp7, externalAddress(onePtr));
    // 0x3ff00000
    masm.mulsd(temp4, temp6);
    masm.movdq(temp2, new AMD64Address(gpr1, 168));
    masm.andpd(temp2, temp6);
    masm.mulsd(temp5, temp2);
    masm.mulsd(temp6, new AMD64Address(gpr1, 160));
    masm.subsd(temp7, temp5);
    masm.subsd(temp2, new AMD64Address(gpr1, 128));
    masm.subsd(temp7, temp4);
    masm.mulsd(temp7, temp6);
    masm.movdqu(temp4, temp3);
    masm.subsd(temp3, temp2);
    masm.addsd(temp2, temp3);
    masm.subsd(temp4, temp2);
    masm.addsd(dest, temp4);
    masm.subsd(dest, temp7);
    masm.addsd(dest, temp3);
    masm.jmp(bb15);
    masm.bind(bb9);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.movl(gpr7, 0);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.addl(gpr4, 64);
    masm.movq(gpr8, gpr9);
    masm.movq(gpr9, gpr7);
    masm.cmpq(gpr8, 0);
    masm.jcc(ConditionFlag.NotEqual, bb10);
    masm.jmp(bb12);
    masm.bind(bb11);
    masm.jcc(ConditionFlag.Equal, bb12);
    masm.negl(gpr3);
    masm.shrq(gpr9);
    masm.movq(gpr1, gpr8);
    masm.shrq(gpr8);
    masm.subl(gpr4, gpr3);
    masm.negl(gpr3);
    masm.addl(gpr3, 64);
    masm.shlq(gpr1);
    masm.orq(gpr9, gpr1);
    masm.jmp(bb12);
    masm.bind(bb5);
    masm.notl(gpr3);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr8);
    masm.movq(gpr6, gpr8);
    masm.testl(gpr8, Integer.MIN_VALUE);
    masm.jcc(ConditionFlag.NotEqual, bb13);
    masm.shrl(gpr8);
    masm.movl(gpr2, 0);
    masm.shrq(gpr6, 2);
    masm.jmp(bb8);
    masm.bind(bb6);
    masm.shrl(gpr8);
    masm.movl(gpr2, 1073741824);
    masm.shrl(gpr2);
    masm.shlq(gpr8, 32);
    masm.orq(gpr8, gpr10);
    masm.shlq(gpr2, 32);
    masm.addl(gpr6, 1073741824);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.jmp(bb8);
    masm.bind(bb13);
    masm.shrl(gpr8);
    masm.movq(gpr2, 0x100000000L);
    masm.shrq(gpr2);
    masm.movl(gpr3, 0);
    masm.movl(gpr10, 0);
    masm.subq(gpr3, gpr7);
    masm.sbbq(gpr10, gpr9);
    masm.sbbq(gpr2, gpr8);
    masm.movq(gpr7, gpr3);
    masm.movq(gpr9, gpr10);
    masm.movq(gpr8, gpr2);
    masm.movl(gpr2, 32768);
    masm.shrq(gpr6, 2);
    masm.addl(gpr6, 1073741824);
    masm.jmp(bb8);
    masm.bind(bb15);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)36 Register (jdk.vm.ci.code.Register)26 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)23 Label (org.graalvm.compiler.asm.Label)15 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)7 CallingConvention (jdk.vm.ci.code.CallingConvention)6 RegisterConfig (jdk.vm.ci.code.RegisterConfig)5 TargetDescription (jdk.vm.ci.code.TargetDescription)5 Field (java.lang.reflect.Field)4 AMD64Assembler (org.graalvm.compiler.asm.amd64.AMD64Assembler)4 AssemblerTest (org.graalvm.compiler.asm.test.AssemblerTest)4 CompilationResult (org.graalvm.compiler.code.CompilationResult)4 Test (org.junit.Test)4 AMD64MacroAssembler (org.graalvm.compiler.asm.amd64.AMD64MacroAssembler)3 AMD64Kind (jdk.vm.ci.amd64.AMD64Kind)2 Scale (org.graalvm.compiler.asm.amd64.AMD64Address.Scale)2 CGlobalDataReference (com.oracle.svm.core.graal.code.CGlobalDataReference)1 SubstrateRegisterConfig (com.oracle.svm.core.graal.meta.SubstrateRegisterConfig)1 AMD64.rax (jdk.vm.ci.amd64.AMD64.rax)1 AMD64.rbx (jdk.vm.ci.amd64.AMD64.rbx)1