use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64HotSpotSafepointOp method emitGlobalPoll.
private static void emitGlobalPoll(CompilationResultBuilder crb, AMD64MacroAssembler asm, GraalHotSpotVMConfig config, boolean atReturn, LIRFrameState state, Register scratch) {
assert !atReturn || state == null : "state is unneeded at return";
if (ImmutableCode.getValue(crb.getOptions())) {
JavaKind hostWordKind = JavaKind.Long;
int alignment = hostWordKind.getBitCount() / Byte.SIZE;
JavaConstant pollingPageAddress = JavaConstant.forIntegerKind(hostWordKind, config.safepointPollingAddress);
// co-located with the immutable code.
if (GeneratePIC.getValue(crb.getOptions())) {
asm.movq(scratch, asm.getPlaceholder(-1));
} else {
asm.movq(scratch, (AMD64Address) crb.recordDataReferenceInCode(pollingPageAddress, alignment));
}
final int pos = asm.position();
crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
if (state != null) {
crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
}
asm.testl(rax, new AMD64Address(scratch));
} else if (isPollingPageFar(config)) {
asm.movq(scratch, config.safepointPollingAddress);
crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
final int pos = asm.position();
if (state != null) {
crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
}
asm.testl(rax, new AMD64Address(scratch));
} else {
crb.recordMark(atReturn ? config.MARKID_POLL_RETURN_NEAR : config.MARKID_POLL_NEAR);
final int pos = asm.position();
if (state != null) {
crb.recordInfopoint(pos, state, InfopointReason.SAFEPOINT);
}
// The C++ code transforms the polling page offset into an RIP displacement
// to the real address at that offset in the polling page.
asm.testl(rax, new AMD64Address(rip, 0));
}
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64HotSpotUnwindOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
leaveFrameAndRestoreRbp(crb, masm);
ForeignCallLinkage linkage = crb.foreignCalls.lookupForeignCall(UNWIND_EXCEPTION_TO_CALLER);
CallingConvention cc = linkage.getOutgoingCallingConvention();
assert cc.getArgumentCount() == 2;
assert exception.equals(cc.getArgument(0));
// Get return address (is on top of stack after leave).
Register returnAddress = asRegister(cc.getArgument(1));
masm.movq(returnAddress, new AMD64Address(rsp, 0));
AMD64Call.directJmp(crb, masm, linkage);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method tanIntrinsic.
public void tanIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant oneHalfTanPtr = new ArrayDataPointerConstant(oneHalfTan, 16);
ArrayDataPointerConstant mulSixteenPtr = new ArrayDataPointerConstant(mulSixteen, 16);
ArrayDataPointerConstant signMaskTanPtr = new ArrayDataPointerConstant(signMaskTan, 16);
ArrayDataPointerConstant piThirtyTwoInvTanPtr = new ArrayDataPointerConstant(piThirtyTwoInvTan, 16);
ArrayDataPointerConstant pOneTanPtr = new ArrayDataPointerConstant(pOneTan, 16);
ArrayDataPointerConstant pTwoTanPtr = new ArrayDataPointerConstant(pTwoTan, 16);
ArrayDataPointerConstant pThreeTanPtr = new ArrayDataPointerConstant(pThreeTan, 16);
ArrayDataPointerConstant cTableTanPtr = new ArrayDataPointerConstant(cTableTan, 16);
ArrayDataPointerConstant maskThirtyFiveTanPtr = new ArrayDataPointerConstant(maskThirtyFiveTan, 16);
ArrayDataPointerConstant qElevenTanPtr = new ArrayDataPointerConstant(qElevenTan, 16);
ArrayDataPointerConstant qNineTanPtr = new ArrayDataPointerConstant(qNineTan, 16);
ArrayDataPointerConstant qSevenTanPtr = new ArrayDataPointerConstant(qSevenTan, 8);
ArrayDataPointerConstant qFiveTanPtr = new ArrayDataPointerConstant(qFiveTan, 16);
ArrayDataPointerConstant qThreeTanPtr = new ArrayDataPointerConstant(qThreeTan, 16);
ArrayDataPointerConstant piInvTableTanPtr = new ArrayDataPointerConstant(piInvTableTan, 16);
ArrayDataPointerConstant piFourTanPtr = new ArrayDataPointerConstant(piFourTan, 8);
ArrayDataPointerConstant qqTwoTanPtr = new ArrayDataPointerConstant(qqTwoTan, 8);
ArrayDataPointerConstant onePtr = new ArrayDataPointerConstant(one, 8);
ArrayDataPointerConstant twoPowFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowFiftyFiveTan, 8);
ArrayDataPointerConstant twoPowMFiftyFiveTanPtr = new ArrayDataPointerConstant(twoPowMFiftyFiveTan, 8);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb8 = new Label();
Label bb9 = new Label();
Label bb10 = new Label();
Label bb11 = new Label();
Label bb12 = new Label();
Label bb13 = new Label();
Label bb14 = new Label();
Label bb15 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
setCrb(crb);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
masm.pextrw(gpr1, dest, 3);
masm.andl(gpr1, 32767);
masm.subl(gpr1, 16314);
masm.cmpl(gpr1, 270);
masm.jcc(ConditionFlag.Above, bb0);
// 0x00000000,
masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
// 0x3fe00000,
// 0x00000000,
// 0x3fe00000
// 0x00000000,
masm.movdqu(temp6, externalAddress(mulSixteenPtr));
// 0x40300000,
// 0x00000000,
// 0x3ff00000
masm.unpcklpd(dest, dest);
// 0x00000000,
masm.movdqu(temp4, externalAddress(signMaskTanPtr));
// 0x80000000,
// 0x00000000,
// 0x80000000
masm.andpd(temp4, dest);
// 0x6dc9c883,
masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
// 0x3fe45f30,
// 0x6dc9c883,
// 0x40245f30
masm.mulpd(temp1, dest);
masm.por(temp5, temp4);
masm.addpd(temp1, temp5);
masm.movdqu(temp7, temp1);
masm.unpckhpd(temp7, temp7);
masm.cvttsd2sil(gpr4, temp7);
masm.cvttpd2dq(temp1, temp1);
masm.cvtdq2pd(temp1, temp1);
masm.mulpd(temp1, temp6);
// 0x54444000,
masm.movdqu(temp3, externalAddress(pOneTanPtr));
// 0x3fb921fb,
// 0x54440000,
// 0x3fb921fb
// 0x676733af,
masm.movdq(temp5, externalAddress(qqTwoTanPtr));
// 0x3d32e7b9
masm.addq(gpr4, 469248);
// 0x67674000,
masm.movdqu(temp4, externalAddress(pTwoTanPtr));
// 0xbd32e7b9,
// 0x4c4c0000,
// 0x3d468c23
masm.mulpd(temp3, temp1);
masm.andq(gpr4, 31);
masm.mulsd(temp5, temp1);
masm.movq(gpr3, gpr4);
masm.mulpd(temp4, temp1);
masm.shlq(gpr3, 1);
masm.subpd(dest, temp3);
// 0x3707344a,
masm.mulpd(temp1, externalAddress(pThreeTanPtr));
// 0x3aa8a2e0,
// 0x03707345,
// 0x3ae98a2e
masm.addq(gpr4, gpr3);
masm.shlq(gpr3, 2);
masm.addq(gpr4, gpr3);
masm.addsd(temp5, dest);
masm.movdqu(temp2, dest);
masm.subpd(dest, temp4);
// 0x00000000,
masm.movdq(temp6, externalAddress(onePtr));
// 0x3ff00000
masm.shlq(gpr4, 4);
masm.leaq(gpr1, externalAddress(cTableTanPtr));
// 0xfffc0000,
masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
// 0xffffffff,
// 0x00000000,
// 0x00000000
masm.movdqu(temp3, dest);
masm.addq(gpr1, gpr4);
masm.subpd(temp2, dest);
masm.unpckhpd(dest, dest);
masm.divsd(temp6, temp5);
masm.subpd(temp2, temp4);
masm.movdqu(temp7, new AMD64Address(gpr1, 16));
masm.subsd(temp3, temp5);
masm.mulpd(temp7, dest);
masm.subpd(temp2, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 48));
masm.mulpd(temp1, dest);
masm.movdqu(temp4, new AMD64Address(gpr1, 96));
masm.mulpd(temp4, dest);
masm.addsd(temp2, temp3);
masm.movdqu(temp3, dest);
masm.mulpd(dest, dest);
masm.addpd(temp7, new AMD64Address(gpr1, 0));
masm.addpd(temp1, new AMD64Address(gpr1, 32));
masm.mulpd(temp1, dest);
masm.addpd(temp4, new AMD64Address(gpr1, 80));
masm.addpd(temp7, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 112));
masm.mulpd(temp1, dest);
masm.mulpd(dest, dest);
masm.addpd(temp4, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 64));
masm.mulpd(temp1, dest);
masm.addpd(temp7, temp1);
masm.movdqu(temp1, temp3);
masm.mulpd(temp3, dest);
masm.mulsd(dest, dest);
masm.mulpd(temp1, new AMD64Address(gpr1, 144));
masm.mulpd(temp4, temp3);
masm.movdqu(temp3, temp1);
masm.addpd(temp7, temp4);
masm.movdqu(temp4, temp1);
masm.mulsd(dest, temp7);
masm.unpckhpd(temp7, temp7);
masm.addsd(dest, temp7);
masm.unpckhpd(temp1, temp1);
masm.addsd(temp3, temp1);
masm.subsd(temp4, temp3);
masm.addsd(temp1, temp4);
masm.movdqu(temp4, temp2);
masm.movdq(temp7, new AMD64Address(gpr1, 144));
masm.unpckhpd(temp2, temp2);
masm.addsd(temp7, new AMD64Address(gpr1, 152));
masm.mulsd(temp7, temp2);
masm.addsd(temp7, new AMD64Address(gpr1, 136));
masm.addsd(temp7, temp1);
masm.addsd(dest, temp7);
// 0x00000000,
masm.movdq(temp7, externalAddress(onePtr));
// 0x3ff00000
masm.mulsd(temp4, temp6);
masm.movdq(temp2, new AMD64Address(gpr1, 168));
masm.andpd(temp2, temp6);
masm.mulsd(temp5, temp2);
masm.mulsd(temp6, new AMD64Address(gpr1, 160));
masm.subsd(temp7, temp5);
masm.subsd(temp2, new AMD64Address(gpr1, 128));
masm.subsd(temp7, temp4);
masm.mulsd(temp7, temp6);
masm.movdqu(temp4, temp3);
masm.subsd(temp3, temp2);
masm.addsd(temp2, temp3);
masm.subsd(temp4, temp2);
masm.addsd(dest, temp4);
masm.subsd(dest, temp7);
masm.addsd(dest, temp3);
masm.jmp(bb15);
masm.bind(bb0);
masm.jcc(ConditionFlag.Greater, bb1);
masm.pextrw(gpr1, dest, 3);
masm.movl(gpr4, gpr1);
masm.andl(gpr1, 32752);
masm.jcc(ConditionFlag.Equal, bb2);
masm.andl(gpr4, 32767);
masm.cmpl(gpr4, 15904);
masm.jcc(ConditionFlag.Below, bb3);
masm.movdqu(temp2, dest);
masm.movdqu(temp3, dest);
// 0xb8fe4d77,
masm.movdq(temp1, externalAddress(qElevenTanPtr));
// 0x3f82609a
masm.mulsd(temp2, dest);
masm.mulsd(temp3, temp2);
masm.mulsd(temp1, temp2);
// 0xbf847a43,
masm.addsd(temp1, externalAddress(qNineTanPtr));
// 0x3f9664a0
masm.mulsd(temp1, temp2);
// 0x52c4c8ab,
masm.addsd(temp1, externalAddress(qSevenTanPtr));
// 0x3faba1ba
masm.mulsd(temp1, temp2);
// 0x11092746,
masm.addsd(temp1, externalAddress(qFiveTanPtr));
// 0x3fc11111
masm.mulsd(temp1, temp2);
// 0x55555612,
masm.addsd(temp1, externalAddress(qThreeTanPtr));
// 0x3fd55555
masm.mulsd(temp1, temp3);
masm.addsd(dest, temp1);
masm.jmp(bb15);
masm.bind(bb3);
// 0x00000000,
masm.movdq(temp3, externalAddress(twoPowFiftyFiveTanPtr));
// 0x43600000
masm.mulsd(temp3, dest);
masm.addsd(dest, temp3);
// 0x00000000,
masm.mulsd(dest, externalAddress(twoPowMFiftyFiveTanPtr));
// 0x3c800000
masm.jmp(bb15);
masm.bind(bb14);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.divsd(dest, temp1);
masm.jmp(bb15);
masm.bind(bb2);
masm.movdqu(temp1, dest);
masm.mulsd(temp1, temp1);
masm.jmp(bb15);
masm.bind(bb1);
masm.pextrw(gpr3, dest, 3);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 32752);
masm.jcc(ConditionFlag.Equal, bb14);
masm.subl(gpr3, 16224);
masm.shrl(gpr3, 7);
masm.andl(gpr3, 65532);
masm.leaq(gpr10, externalAddress(piInvTableTanPtr));
masm.addq(gpr3, gpr10);
masm.movdq(gpr1, dest);
masm.movl(gpr9, new AMD64Address(gpr3, 20));
masm.movl(gpr7, new AMD64Address(gpr3, 24));
masm.movl(gpr4, gpr1);
masm.shrq(gpr1, 21);
masm.orl(gpr1, Integer.MIN_VALUE);
masm.shrl(gpr1, 11);
masm.movl(gpr8, gpr9);
masm.imulq(gpr9, gpr4);
masm.imulq(gpr8, gpr1);
masm.imulq(gpr7, gpr1);
masm.movl(gpr5, new AMD64Address(gpr3, 16));
masm.movl(gpr6, new AMD64Address(gpr3, 12));
masm.movl(gpr10, gpr9);
masm.shrq(gpr9, 32);
masm.addq(gpr8, gpr9);
masm.addq(gpr10, gpr7);
masm.movl(gpr7, gpr10);
masm.shrq(gpr10, 32);
masm.addq(gpr8, gpr10);
masm.movl(gpr9, gpr5);
masm.imulq(gpr5, gpr4);
masm.imulq(gpr9, gpr1);
masm.movl(gpr10, gpr6);
masm.imulq(gpr6, gpr4);
masm.movl(gpr2, gpr5);
masm.shrq(gpr5, 32);
masm.addq(gpr8, gpr2);
masm.movl(gpr2, gpr8);
masm.shrq(gpr8, 32);
masm.addq(gpr9, gpr5);
masm.addq(gpr9, gpr8);
masm.shlq(gpr2, 32);
masm.orq(gpr7, gpr2);
masm.imulq(gpr10, gpr1);
masm.movl(gpr8, new AMD64Address(gpr3, 8));
masm.movl(gpr5, new AMD64Address(gpr3, 4));
masm.movl(gpr2, gpr6);
masm.shrq(gpr6, 32);
masm.addq(gpr9, gpr2);
masm.movl(gpr2, gpr9);
masm.shrq(gpr9, 32);
masm.addq(gpr10, gpr6);
masm.addq(gpr10, gpr9);
masm.movq(gpr6, gpr8);
masm.imulq(gpr8, gpr4);
masm.imulq(gpr6, gpr1);
masm.movl(gpr9, gpr8);
masm.shrq(gpr8, 32);
masm.addq(gpr10, gpr9);
masm.movl(gpr9, gpr10);
masm.shrq(gpr10, 32);
masm.addq(gpr6, gpr8);
masm.addq(gpr6, gpr10);
masm.movq(gpr8, gpr5);
masm.imulq(gpr5, gpr4);
masm.imulq(gpr8, gpr1);
masm.shlq(gpr9, 32);
masm.orq(gpr9, gpr2);
masm.movl(gpr1, new AMD64Address(gpr3, 0));
masm.movl(gpr10, gpr5);
masm.shrq(gpr5, 32);
masm.addq(gpr6, gpr10);
masm.movl(gpr10, gpr6);
masm.shrq(gpr6, 32);
masm.addq(gpr8, gpr5);
masm.addq(gpr8, gpr6);
masm.imulq(gpr4, gpr1);
masm.pextrw(gpr2, dest, 3);
masm.leaq(gpr6, externalAddress(piInvTableTanPtr));
masm.subq(gpr3, gpr6);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, 19);
masm.movl(gpr5, 32768);
masm.andl(gpr5, gpr2);
masm.shrl(gpr2, 4);
masm.andl(gpr2, 2047);
masm.subl(gpr2, 1023);
masm.subl(gpr3, gpr2);
masm.addq(gpr8, gpr4);
masm.movl(gpr4, gpr3);
masm.addl(gpr4, 32);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Less, bb5);
masm.negl(gpr3);
masm.addl(gpr3, 29);
masm.shll(gpr8);
masm.movl(gpr6, gpr8);
masm.andl(gpr8, 1073741823);
masm.testl(gpr8, 536870912);
masm.jcc(ConditionFlag.NotEqual, bb6);
masm.shrl(gpr8);
masm.movl(gpr2, 0);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.bind(bb8);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.Equal, bb9);
masm.bind(bb10);
masm.bsrq(gpr10, gpr8);
masm.movl(gpr3, 29);
masm.subl(gpr3, gpr10);
masm.jcc(ConditionFlag.LessEqual, bb11);
masm.shlq(gpr8);
masm.movq(gpr1, gpr9);
masm.shlq(gpr9);
masm.addl(gpr4, gpr3);
masm.negl(gpr3);
masm.addl(gpr3, 64);
masm.shrq(gpr1);
masm.shrq(gpr7);
masm.orq(gpr8, gpr1);
masm.orq(gpr9, gpr7);
masm.bind(bb12);
masm.cvtsi2sdq(dest, gpr8);
masm.shrq(gpr9, 1);
masm.cvtsi2sdq(temp3, gpr9);
masm.xorpd(temp4, temp4);
masm.shll(gpr4, 4);
masm.negl(gpr4);
masm.addl(gpr4, 16368);
masm.orl(gpr4, gpr5);
masm.xorl(gpr4, gpr2);
masm.pinsrw(temp4, gpr4, 3);
masm.leaq(gpr1, externalAddress(piFourTanPtr));
// 0x00000000,
masm.movdq(temp2, new AMD64Address(gpr1, 0));
// 0x3fe921fb,
// 0x4611a626,
masm.movdq(temp7, new AMD64Address(gpr1, 8));
// 0x3e85110b
masm.xorpd(temp5, temp5);
masm.subl(gpr4, 1008);
masm.pinsrw(temp5, gpr4, 3);
masm.mulsd(dest, temp4);
masm.shll(gpr5, 16);
masm.sarl(gpr5, 31);
masm.mulsd(temp3, temp5);
masm.movdqu(temp1, dest);
masm.mulsd(dest, temp2);
masm.shrl(gpr6, 30);
masm.addsd(temp1, temp3);
masm.mulsd(temp3, temp2);
masm.addl(gpr6, gpr5);
masm.xorl(gpr6, gpr5);
masm.mulsd(temp7, temp1);
masm.movl(gpr1, gpr6);
masm.addsd(temp7, temp3);
masm.movdqu(temp2, dest);
masm.addsd(dest, temp7);
masm.subsd(temp2, dest);
masm.addsd(temp7, temp2);
// 0x6dc9c883,
masm.movdqu(temp1, externalAddress(piThirtyTwoInvTanPtr));
// 0x40245f30
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(dest, dest);
} else {
masm.movlhps(dest, dest);
}
// 0x00000000,
masm.movdqu(temp4, externalAddress(signMaskTanPtr));
// 0x80000000,
// 0x00000000,
// 0x80000000
masm.andpd(temp4, dest);
masm.mulpd(temp1, dest);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp7, temp7);
} else {
masm.movlhps(temp7, temp7);
}
// 0x00000000,
masm.movdqu(temp5, externalAddress(oneHalfTanPtr));
// 0x3fe00000,
// 0x00000000,
// 0x3fe00000
// 0x00000000,
masm.movdqu(temp6, externalAddress(mulSixteenPtr));
// 0x40300000,
// 0x00000000,
// 0x3ff00000
masm.por(temp5, temp4);
masm.addpd(temp1, temp5);
masm.movdqu(temp5, temp1);
masm.unpckhpd(temp5, temp5);
masm.cvttsd2sil(gpr4, temp5);
masm.cvttpd2dq(temp1, temp1);
masm.cvtdq2pd(temp1, temp1);
masm.mulpd(temp1, temp6);
// 0x54444000,
masm.movdqu(temp3, externalAddress(pOneTanPtr));
// 0x3fb921fb,
// 0x54440000,
// 0x3fb921fb
// 0x676733af,
masm.movdq(temp5, externalAddress(qqTwoTanPtr));
// 0x3d32e7b9
masm.shll(gpr1, 4);
masm.addl(gpr4, 469248);
// 0x67674000,
masm.movdqu(temp4, externalAddress(pTwoTanPtr));
// 0xbd32e7b9,
// 0x4c4c0000,
// 0x3d468c23
masm.mulpd(temp3, temp1);
masm.addl(gpr4, gpr1);
masm.andl(gpr4, 31);
masm.mulsd(temp5, temp1);
masm.movl(gpr3, gpr4);
masm.mulpd(temp4, temp1);
masm.shll(gpr3, 1);
masm.subpd(dest, temp3);
// 0x3707344a,
masm.mulpd(temp1, externalAddress(pThreeTanPtr));
// 0x3aa8a2e0,
// 0x03707345,
// 0x3ae98a2e
masm.addl(gpr4, gpr3);
masm.shll(gpr3, 2);
masm.addl(gpr4, gpr3);
masm.addsd(temp5, dest);
masm.movdqu(temp2, dest);
masm.subpd(dest, temp4);
// 0x00000000,
masm.movdq(temp6, externalAddress(onePtr));
// 0x3ff00000
masm.shll(gpr4, 4);
masm.leaq(gpr1, externalAddress(cTableTanPtr));
// 0xfffc0000,
masm.andpd(temp5, externalAddress(maskThirtyFiveTanPtr));
// 0xffffffff,
// 0x00000000,
// 0x00000000
masm.movdqu(temp3, dest);
masm.addq(gpr1, gpr4);
masm.subpd(temp2, dest);
masm.unpckhpd(dest, dest);
masm.divsd(temp6, temp5);
masm.subpd(temp2, temp4);
masm.subsd(temp3, temp5);
masm.subpd(temp2, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 48));
masm.addpd(temp2, temp7);
masm.movdqu(temp7, new AMD64Address(gpr1, 16));
masm.mulpd(temp7, dest);
masm.movdqu(temp4, new AMD64Address(gpr1, 96));
masm.mulpd(temp1, dest);
masm.mulpd(temp4, dest);
masm.addsd(temp2, temp3);
masm.movdqu(temp3, dest);
masm.mulpd(dest, dest);
masm.addpd(temp7, new AMD64Address(gpr1, 0));
masm.addpd(temp1, new AMD64Address(gpr1, 32));
masm.mulpd(temp1, dest);
masm.addpd(temp4, new AMD64Address(gpr1, 80));
masm.addpd(temp7, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 112));
masm.mulpd(temp1, dest);
masm.mulpd(dest, dest);
masm.addpd(temp4, temp1);
masm.movdqu(temp1, new AMD64Address(gpr1, 64));
masm.mulpd(temp1, dest);
masm.addpd(temp7, temp1);
masm.movdqu(temp1, temp3);
masm.mulpd(temp3, dest);
masm.mulsd(dest, dest);
masm.mulpd(temp1, new AMD64Address(gpr1, 144));
masm.mulpd(temp4, temp3);
masm.movdqu(temp3, temp1);
masm.addpd(temp7, temp4);
masm.movdqu(temp4, temp1);
masm.mulsd(dest, temp7);
masm.unpckhpd(temp7, temp7);
masm.addsd(dest, temp7);
masm.unpckhpd(temp1, temp1);
masm.addsd(temp3, temp1);
masm.subsd(temp4, temp3);
masm.addsd(temp1, temp4);
masm.movdqu(temp4, temp2);
masm.movdq(temp7, new AMD64Address(gpr1, 144));
masm.unpckhpd(temp2, temp2);
masm.addsd(temp7, new AMD64Address(gpr1, 152));
masm.mulsd(temp7, temp2);
masm.addsd(temp7, new AMD64Address(gpr1, 136));
masm.addsd(temp7, temp1);
masm.addsd(dest, temp7);
// 0x00000000,
masm.movdq(temp7, externalAddress(onePtr));
// 0x3ff00000
masm.mulsd(temp4, temp6);
masm.movdq(temp2, new AMD64Address(gpr1, 168));
masm.andpd(temp2, temp6);
masm.mulsd(temp5, temp2);
masm.mulsd(temp6, new AMD64Address(gpr1, 160));
masm.subsd(temp7, temp5);
masm.subsd(temp2, new AMD64Address(gpr1, 128));
masm.subsd(temp7, temp4);
masm.mulsd(temp7, temp6);
masm.movdqu(temp4, temp3);
masm.subsd(temp3, temp2);
masm.addsd(temp2, temp3);
masm.subsd(temp4, temp2);
masm.addsd(dest, temp4);
masm.subsd(dest, temp7);
masm.addsd(dest, temp3);
masm.jmp(bb15);
masm.bind(bb9);
masm.addl(gpr4, 64);
masm.movq(gpr8, gpr9);
masm.movq(gpr9, gpr7);
masm.movl(gpr7, 0);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.NotEqual, bb10);
masm.addl(gpr4, 64);
masm.movq(gpr8, gpr9);
masm.movq(gpr9, gpr7);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.NotEqual, bb10);
masm.jmp(bb12);
masm.bind(bb11);
masm.jcc(ConditionFlag.Equal, bb12);
masm.negl(gpr3);
masm.shrq(gpr9);
masm.movq(gpr1, gpr8);
masm.shrq(gpr8);
masm.subl(gpr4, gpr3);
masm.negl(gpr3);
masm.addl(gpr3, 64);
masm.shlq(gpr1);
masm.orq(gpr9, gpr1);
masm.jmp(bb12);
masm.bind(bb5);
masm.notl(gpr3);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.shlq(gpr8);
masm.movq(gpr6, gpr8);
masm.testl(gpr8, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.NotEqual, bb13);
masm.shrl(gpr8);
masm.movl(gpr2, 0);
masm.shrq(gpr6, 2);
masm.jmp(bb8);
masm.bind(bb6);
masm.shrl(gpr8);
masm.movl(gpr2, 1073741824);
masm.shrl(gpr2);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.shlq(gpr2, 32);
masm.addl(gpr6, 1073741824);
masm.movl(gpr3, 0);
masm.movl(gpr10, 0);
masm.subq(gpr3, gpr7);
masm.sbbq(gpr10, gpr9);
masm.sbbq(gpr2, gpr8);
masm.movq(gpr7, gpr3);
masm.movq(gpr9, gpr10);
masm.movq(gpr8, gpr2);
masm.movl(gpr2, 32768);
masm.jmp(bb8);
masm.bind(bb13);
masm.shrl(gpr8);
masm.movq(gpr2, 0x100000000L);
masm.shrq(gpr2);
masm.movl(gpr3, 0);
masm.movl(gpr10, 0);
masm.subq(gpr3, gpr7);
masm.sbbq(gpr10, gpr9);
masm.sbbq(gpr2, gpr8);
masm.movq(gpr7, gpr3);
masm.movq(gpr9, gpr10);
masm.movq(gpr8, gpr2);
masm.movl(gpr2, 32768);
masm.shrq(gpr6, 2);
masm.addl(gpr6, 1073741824);
masm.jmp(bb8);
masm.bind(bb15);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method sinIntrinsic.
public void sinIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant oneHalfPtr = new ArrayDataPointerConstant(oneHalf, 16);
ArrayDataPointerConstant pTwoPtr = new ArrayDataPointerConstant(pTwo, 16);
ArrayDataPointerConstant scFourPtr = new ArrayDataPointerConstant(scFour, 16);
ArrayDataPointerConstant cTablePtr = new ArrayDataPointerConstant(cTable, 16);
ArrayDataPointerConstant scTwoPtr = new ArrayDataPointerConstant(scTwo, 16);
ArrayDataPointerConstant scThreePtr = new ArrayDataPointerConstant(scThree, 16);
ArrayDataPointerConstant scOnePtr = new ArrayDataPointerConstant(scOne, 16);
ArrayDataPointerConstant piInvTablePtr = new ArrayDataPointerConstant(piInvTable, 16);
ArrayDataPointerConstant piFourPtr = new ArrayDataPointerConstant(piFour, 16);
ArrayDataPointerConstant piThirtyTwoInvPtr = new ArrayDataPointerConstant(piThirtyTwoInv, 8);
ArrayDataPointerConstant shifterPtr = new ArrayDataPointerConstant(shifter, 8);
ArrayDataPointerConstant signMaskPtr = new ArrayDataPointerConstant(signMask, 8);
ArrayDataPointerConstant pThreePtr = new ArrayDataPointerConstant(pThree, 8);
ArrayDataPointerConstant allOnesPtr = new ArrayDataPointerConstant(allOnes, 8);
ArrayDataPointerConstant twoPowFiftyFivePtr = new ArrayDataPointerConstant(twoPowFiftyFive, 8);
ArrayDataPointerConstant twoPowFiftyFiveMPtr = new ArrayDataPointerConstant(twoPowFiftyFiveM, 8);
ArrayDataPointerConstant pOnePtr = new ArrayDataPointerConstant(pOne, 8);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb8 = new Label();
Label bb9 = new Label();
Label bb10 = new Label();
Label bb11 = new Label();
Label bb12 = new Label();
Label bb13 = new Label();
Label bb14 = new Label();
Label bb15 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register gpr5 = asRegister(gpr5Temp, AMD64Kind.QWORD);
Register gpr6 = asRegister(gpr6Temp, AMD64Kind.QWORD);
Register gpr7 = asRegister(gpr7Temp, AMD64Kind.QWORD);
Register gpr8 = asRegister(gpr8Temp, AMD64Kind.QWORD);
Register gpr9 = asRegister(gpr9Temp, AMD64Kind.QWORD);
Register gpr10 = asRegister(gpr10Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
Register temp8 = asRegister(xmm8Temp, AMD64Kind.DOUBLE);
Register temp9 = asRegister(xmm9Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movsd(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
masm.leaq(gpr1, stackSlot);
masm.movl(gpr1, new AMD64Address(gpr1, 4));
// 0x6dc9c883,
masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
// 0x40245f30
// 0x00000000,
masm.movdq(temp2, externalAddress(shifterPtr));
// 0x43380000
masm.andl(gpr1, 2147418112);
masm.subl(gpr1, 808452096);
masm.cmpl(gpr1, 281346048);
masm.jcc(ConditionFlag.Above, bb0);
masm.mulsd(temp1, dest);
// 0x00000000,
masm.movdqu(temp5, externalAddress(oneHalfPtr));
// 0x3fe00000,
// 0x00000000,
// 0x3fe00000
// 0x00000000,
masm.movdq(temp4, externalAddress(signMaskPtr));
// 0x80000000
masm.pand(temp4, dest);
masm.por(temp5, temp4);
masm.addpd(temp1, temp5);
masm.cvttsd2sil(gpr4, temp1);
masm.cvtsi2sdl(temp1, gpr4);
// 0x1a600000,
masm.movdqu(temp6, externalAddress(pTwoPtr));
// 0x3d90b461,
// 0x1a600000,
// 0x3d90b461
masm.movq(gpr7, 0x3fb921fb54400000L);
masm.movdq(temp3, gpr7);
// 0xa556c734,
masm.movdqu(temp5, externalAddress(scFourPtr));
// 0x3ec71de3,
// 0x1a01a01a,
// 0x3efa01a0
masm.pshufd(temp4, dest, 0x44);
masm.mulsd(temp3, temp1);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp1, temp1);
} else {
masm.movlhps(temp1, temp1);
}
masm.andl(gpr4, 63);
masm.shll(gpr4, 5);
masm.leaq(gpr1, externalAddress(cTablePtr));
masm.addq(gpr1, gpr4);
masm.movdqu(temp8, new AMD64Address(gpr1, 0));
masm.mulpd(temp6, temp1);
// 0x2e037073,
masm.mulsd(temp1, externalAddress(pThreePtr));
// 0x3b63198a
masm.subsd(temp4, temp3);
masm.subsd(dest, temp3);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp3, temp4);
} else {
masm.movdqu(temp3, temp4);
masm.movlhps(temp3, temp3);
}
masm.subsd(temp4, temp6);
masm.pshufd(dest, dest, 0x44);
masm.pshufd(temp7, temp8, 0xE);
masm.movdqu(temp2, temp8);
masm.movdqu(temp9, temp7);
masm.mulpd(temp5, dest);
masm.subpd(dest, temp6);
masm.mulsd(temp7, temp4);
masm.subsd(temp3, temp4);
masm.mulpd(temp5, dest);
masm.mulpd(dest, dest);
masm.subsd(temp3, temp6);
// 0x11111111,
masm.movdqu(temp6, externalAddress(scTwoPtr));
// 0x3f811111,
// 0x55555555,
// 0x3fa55555
masm.subsd(temp1, temp3);
masm.movdq(temp3, new AMD64Address(gpr1, 24));
masm.addsd(temp2, temp3);
masm.subsd(temp7, temp2);
masm.mulsd(temp2, temp4);
masm.mulpd(temp6, dest);
masm.mulsd(temp3, temp4);
masm.mulpd(temp2, dest);
masm.mulpd(dest, dest);
// 0x1a01a01a,
masm.addpd(temp5, externalAddress(scThreePtr));
// 0xbf2a01a0,
// 0x16c16c17,
// 0xbf56c16c
masm.mulsd(temp4, temp8);
// 0x55555555,
masm.addpd(temp6, externalAddress(scOnePtr));
// 0xbfc55555,
// 0x00000000,
// 0xbfe00000
masm.mulpd(temp5, dest);
masm.movdqu(dest, temp3);
masm.addsd(temp3, temp9);
masm.mulpd(temp1, temp7);
masm.movdqu(temp7, temp4);
masm.addsd(temp4, temp3);
masm.addpd(temp6, temp5);
masm.subsd(temp9, temp3);
masm.subsd(temp3, temp4);
masm.addsd(temp1, new AMD64Address(gpr1, 16));
masm.mulpd(temp6, temp2);
masm.addsd(temp9, dest);
masm.addsd(temp3, temp7);
masm.addsd(temp1, temp9);
masm.addsd(temp1, temp3);
masm.addsd(temp1, temp6);
masm.unpckhpd(temp6, temp6);
masm.movdqu(dest, temp4);
masm.addsd(temp1, temp6);
masm.addsd(dest, temp1);
masm.jmp(bb15);
masm.bind(bb14);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.divsd(dest, temp1);
masm.jmp(bb15);
masm.bind(bb0);
masm.jcc(ConditionFlag.Greater, bb1);
masm.shrl(gpr1, 20);
masm.cmpl(gpr1, 3325);
masm.jcc(ConditionFlag.NotEqual, bb2);
// 0xffffffff,
masm.mulsd(dest, externalAddress(allOnesPtr));
// 0x3fefffff
masm.jmp(bb15);
masm.bind(bb2);
// 0x00000000,
masm.movdq(temp3, externalAddress(twoPowFiftyFivePtr));
// 0x43600000
masm.mulsd(temp3, dest);
masm.subsd(temp3, dest);
// 0x00000000,
masm.mulsd(temp3, externalAddress(twoPowFiftyFiveMPtr));
// 0x3c800000
masm.jmp(bb15);
masm.bind(bb1);
masm.pextrw(gpr3, dest, 3);
masm.andl(gpr3, 32752);
masm.cmpl(gpr3, 32752);
masm.jcc(ConditionFlag.Equal, bb14);
masm.subl(gpr3, 16224);
masm.shrl(gpr3, 7);
masm.andl(gpr3, 65532);
masm.leaq(gpr10, externalAddress(piInvTablePtr));
masm.addq(gpr3, gpr10);
masm.movdq(gpr1, dest);
masm.movl(gpr9, new AMD64Address(gpr3, 20));
masm.movl(gpr7, new AMD64Address(gpr3, 24));
masm.movl(gpr4, gpr1);
masm.shrq(gpr1, 21);
masm.orl(gpr1, Integer.MIN_VALUE);
masm.shrl(gpr1, 11);
masm.movl(gpr8, gpr9);
masm.imulq(gpr9, gpr4);
masm.imulq(gpr8, gpr1);
masm.imulq(gpr7, gpr1);
masm.movl(gpr5, new AMD64Address(gpr3, 16));
masm.movl(gpr6, new AMD64Address(gpr3, 12));
masm.movl(gpr10, gpr9);
masm.shrq(gpr9, 32);
masm.addq(gpr8, gpr9);
masm.addq(gpr10, gpr7);
masm.movl(gpr7, gpr10);
masm.shrq(gpr10, 32);
masm.addq(gpr8, gpr10);
masm.movl(gpr9, gpr5);
masm.imulq(gpr5, gpr4);
masm.imulq(gpr9, gpr1);
masm.movl(gpr10, gpr6);
masm.imulq(gpr6, gpr4);
masm.movl(gpr2, gpr5);
masm.shrq(gpr5, 32);
masm.addq(gpr8, gpr2);
masm.movl(gpr2, gpr8);
masm.shrq(gpr8, 32);
masm.addq(gpr9, gpr5);
masm.addq(gpr9, gpr8);
masm.shlq(gpr2, 32);
masm.orq(gpr7, gpr2);
masm.imulq(gpr10, gpr1);
masm.movl(gpr8, new AMD64Address(gpr3, 8));
masm.movl(gpr5, new AMD64Address(gpr3, 4));
masm.movl(gpr2, gpr6);
masm.shrq(gpr6, 32);
masm.addq(gpr9, gpr2);
masm.movl(gpr2, gpr9);
masm.shrq(gpr9, 32);
masm.addq(gpr10, gpr6);
masm.addq(gpr10, gpr9);
masm.movq(gpr6, gpr8);
masm.imulq(gpr8, gpr4);
masm.imulq(gpr6, gpr1);
masm.movl(gpr9, gpr8);
masm.shrq(gpr8, 32);
masm.addq(gpr10, gpr9);
masm.movl(gpr9, gpr10);
masm.shrq(gpr10, 32);
masm.addq(gpr6, gpr8);
masm.addq(gpr6, gpr10);
masm.movq(gpr8, gpr5);
masm.imulq(gpr5, gpr4);
masm.imulq(gpr8, gpr1);
masm.shlq(gpr9, 32);
masm.orq(gpr9, gpr2);
masm.movl(gpr1, new AMD64Address(gpr3, 0));
masm.movl(gpr10, gpr5);
masm.shrq(gpr5, 32);
masm.addq(gpr6, gpr10);
masm.movl(gpr10, gpr6);
masm.shrq(gpr6, 32);
masm.addq(gpr8, gpr5);
masm.addq(gpr8, gpr6);
masm.imulq(gpr4, gpr1);
masm.pextrw(gpr2, dest, 3);
masm.leaq(gpr6, externalAddress(piInvTablePtr));
masm.subq(gpr3, gpr6);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, gpr3);
masm.addl(gpr3, 19);
masm.movl(gpr5, 32768);
masm.andl(gpr5, gpr2);
masm.shrl(gpr2, 4);
masm.andl(gpr2, 2047);
masm.subl(gpr2, 1023);
masm.subl(gpr3, gpr2);
masm.addq(gpr8, gpr4);
masm.movl(gpr4, gpr3);
masm.addl(gpr4, 32);
masm.cmpl(gpr3, 1);
masm.jcc(ConditionFlag.Less, bb4);
masm.negl(gpr3);
masm.addl(gpr3, 29);
masm.shll(gpr8);
masm.movl(gpr6, gpr8);
masm.andl(gpr8, 536870911);
masm.testl(gpr8, 268435456);
masm.jcc(ConditionFlag.NotEqual, bb5);
masm.shrl(gpr8);
masm.movl(gpr2, 0);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.bind(bb6);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.Equal, bb8);
masm.bind(bb9);
masm.bsrq(gpr10, gpr8);
masm.movl(gpr3, 29);
masm.subl(gpr3, gpr10);
masm.jcc(ConditionFlag.LessEqual, bb10);
masm.shlq(gpr8);
masm.movq(gpr1, gpr9);
masm.shlq(gpr9);
masm.addl(gpr4, gpr3);
masm.negl(gpr3);
masm.addl(gpr3, 64);
masm.shrq(gpr1);
masm.shrq(gpr7);
masm.orq(gpr8, gpr1);
masm.orq(gpr9, gpr7);
masm.bind(bb11);
masm.cvtsi2sdq(dest, gpr8);
masm.shrq(gpr9, 1);
masm.cvtsi2sdq(temp3, gpr9);
masm.xorpd(temp4, temp4);
masm.shll(gpr4, 4);
masm.negl(gpr4);
masm.addl(gpr4, 16368);
masm.orl(gpr4, gpr5);
masm.xorl(gpr4, gpr2);
masm.pinsrw(temp4, gpr4, 3);
masm.leaq(gpr1, externalAddress(piFourPtr));
// 0x40000000,
masm.movdqu(temp2, new AMD64Address(gpr1, 0));
// 0x3fe921fb,
// 0x18469899,
// 0x3e64442d
masm.xorpd(temp5, temp5);
masm.subl(gpr4, 1008);
masm.pinsrw(temp5, gpr4, 3);
masm.mulsd(dest, temp4);
masm.shll(gpr5, 16);
masm.sarl(gpr5, 31);
masm.mulsd(temp3, temp5);
masm.movdqu(temp1, dest);
masm.pshufd(temp6, temp2, 0xE);
masm.mulsd(dest, temp2);
masm.shrl(gpr6, 29);
masm.addsd(temp1, temp3);
masm.mulsd(temp3, temp2);
masm.addl(gpr6, gpr5);
masm.xorl(gpr6, gpr5);
masm.mulsd(temp6, temp1);
masm.movl(gpr1, gpr6);
masm.addsd(temp6, temp3);
masm.movdqu(temp2, dest);
masm.addsd(dest, temp6);
masm.subsd(temp2, dest);
masm.addsd(temp6, temp2);
masm.bind(bb12);
// 0x6dc9c883,
masm.movdq(temp1, externalAddress(piThirtyTwoInvPtr));
// 0x40245f30
masm.mulsd(temp1, dest);
// 0x00000000,
masm.movdq(temp5, externalAddress(oneHalfPtr));
// 0x3fe00000,
// 0x00000000,
// 0x3fe00000
// 0x00000000,
masm.movdq(temp4, externalAddress(signMaskPtr));
// 0x80000000
masm.pand(temp4, dest);
masm.por(temp5, temp4);
masm.addpd(temp1, temp5);
masm.cvttsd2sil(gpr4, temp1);
masm.cvtsi2sdl(temp1, gpr4);
// 0x54400000,
masm.movdq(temp3, externalAddress(pOnePtr));
// 0x3fb921fb
// 0x1a600000,
masm.movdqu(temp2, externalAddress(pTwoPtr));
// 0x3d90b461,
// 0x1a600000,
// 0x3d90b461
masm.mulsd(temp3, temp1);
masm.unpcklpd(temp1, temp1);
masm.shll(gpr1, 3);
masm.addl(gpr4, 1865216);
masm.movdqu(temp4, dest);
masm.addl(gpr4, gpr1);
masm.andl(gpr4, 63);
// 0x54400000,
masm.movdqu(temp5, externalAddress(scFourPtr));
// 0x3fb921fb
masm.leaq(gpr1, externalAddress(cTablePtr));
masm.shll(gpr4, 5);
masm.addq(gpr1, gpr4);
masm.movdqu(temp8, new AMD64Address(gpr1, 0));
masm.mulpd(temp2, temp1);
masm.subsd(dest, temp3);
// 0x2e037073,
masm.mulsd(temp1, externalAddress(pThreePtr));
// 0x3b63198a
masm.subsd(temp4, temp3);
masm.unpcklpd(dest, dest);
masm.movdqu(temp3, temp4);
masm.subsd(temp4, temp2);
masm.mulpd(temp5, dest);
masm.subpd(dest, temp2);
masm.pshufd(temp7, temp8, 0xE);
masm.movdqu(temp9, temp7);
masm.mulsd(temp7, temp4);
masm.subsd(temp3, temp4);
masm.mulpd(temp5, dest);
masm.mulpd(dest, dest);
masm.subsd(temp3, temp2);
masm.movdqu(temp2, temp8);
masm.subsd(temp1, temp3);
masm.movdq(temp3, new AMD64Address(gpr1, 24));
masm.addsd(temp2, temp3);
masm.subsd(temp7, temp2);
masm.subsd(temp1, temp6);
// 0x11111111,
masm.movdqu(temp6, externalAddress(scTwoPtr));
// 0x3f811111,
// 0x55555555,
// 0x3fa55555
masm.mulsd(temp2, temp4);
masm.mulpd(temp6, dest);
masm.mulsd(temp3, temp4);
masm.mulpd(temp2, dest);
masm.mulpd(dest, dest);
// 0x1a01a01a,
masm.addpd(temp5, externalAddress(scThreePtr));
// 0xbf2a01a0,
// 0x16c16c17,
// 0xbf56c16c
masm.mulsd(temp4, temp8);
// 0x55555555,
masm.addpd(temp6, externalAddress(scOnePtr));
// 0xbfc55555,
// 0x00000000,
// 0xbfe00000
masm.mulpd(temp5, dest);
masm.movdqu(dest, temp3);
masm.addsd(temp3, temp9);
masm.mulpd(temp1, temp7);
masm.movdqu(temp7, temp4);
masm.addsd(temp4, temp3);
masm.addpd(temp6, temp5);
masm.subsd(temp9, temp3);
masm.subsd(temp3, temp4);
masm.addsd(temp1, new AMD64Address(gpr1, 16));
masm.mulpd(temp6, temp2);
masm.addsd(temp9, dest);
masm.addsd(temp3, temp7);
masm.addsd(temp1, temp9);
masm.addsd(temp1, temp3);
masm.addsd(temp1, temp6);
masm.unpckhpd(temp6, temp6);
masm.movdqu(dest, temp4);
masm.addsd(temp1, temp6);
masm.addsd(dest, temp1);
masm.jmp(bb15);
masm.bind(bb8);
masm.addl(gpr4, 64);
masm.movq(gpr8, gpr9);
masm.movq(gpr9, gpr7);
masm.movl(gpr7, 0);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.NotEqual, bb9);
masm.addl(gpr4, 64);
masm.movq(gpr8, gpr9);
masm.movq(gpr9, gpr7);
masm.cmpq(gpr8, 0);
masm.jcc(ConditionFlag.NotEqual, bb9);
masm.xorpd(dest, dest);
masm.xorpd(temp6, temp6);
masm.jmp(bb12);
masm.bind(bb10);
masm.jcc(ConditionFlag.Equal, bb11);
masm.negl(gpr3);
masm.shrq(gpr9);
masm.movq(gpr1, gpr8);
masm.shrq(gpr8);
masm.subl(gpr4, gpr3);
masm.negl(gpr3);
masm.addl(gpr3, 64);
masm.shlq(gpr1);
masm.orq(gpr9, gpr1);
masm.jmp(bb11);
masm.bind(bb4);
masm.negl(gpr3);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.shlq(gpr8);
masm.movq(gpr6, gpr8);
masm.testl(gpr8, Integer.MIN_VALUE);
masm.jcc(ConditionFlag.NotEqual, bb13);
masm.shrl(gpr8);
masm.movl(gpr2, 0);
masm.shrq(gpr6, 3);
masm.jmp(bb6);
masm.bind(bb5);
masm.shrl(gpr8);
masm.movl(gpr2, 536870912);
masm.shrl(gpr2);
masm.shlq(gpr8, 32);
masm.orq(gpr8, gpr10);
masm.shlq(gpr2, 32);
masm.addl(gpr6, 536870912);
masm.movl(gpr3, 0);
masm.movl(gpr10, 0);
masm.subq(gpr3, gpr7);
masm.sbbq(gpr10, gpr9);
masm.sbbq(gpr2, gpr8);
masm.movq(gpr7, gpr3);
masm.movq(gpr9, gpr10);
masm.movq(gpr8, gpr2);
masm.movl(gpr2, 32768);
masm.jmp(bb6);
masm.bind(bb13);
masm.shrl(gpr8);
masm.movq(gpr2, 0x100000000L);
masm.shrq(gpr2);
masm.movl(gpr3, 0);
masm.movl(gpr10, 0);
masm.subq(gpr3, gpr7);
masm.sbbq(gpr10, gpr9);
masm.sbbq(gpr2, gpr8);
masm.movq(gpr7, gpr3);
masm.movq(gpr9, gpr10);
masm.movq(gpr8, gpr2);
masm.movl(gpr2, 32768);
masm.shrq(gpr6, 3);
masm.addl(gpr6, 536870912);
masm.jmp(bb6);
masm.bind(bb15);
}
use of org.graalvm.compiler.asm.amd64.AMD64MacroAssembler in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.
/*
* Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
* Source Code
*
* ALGORITHM DESCRIPTION - LOG10() ---------------------
*
* Let x=2^k * mx, mx in [1,2)
*
* Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
* short approximation for log10(e)
*
* Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
*
* Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
* (high, low parts) Result is formed from high and low parts
*
* Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
* NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
*
*/
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movdq(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
// 0xf8000000,
masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
// 0xffffffff,
// 0x00000000,
// 0xffffe000
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movl(gpr2, 1054736384);
masm.movdl(temp7, gpr2);
masm.xorpd(temp3, temp3);
masm.movl(gpr3, 30704);
masm.pinsrw(temp3, gpr3, 3);
masm.movl(gpr3, 32768);
masm.movdl(temp4, gpr3);
masm.movdqu(temp1, value);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 16352);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.bind(bb1);
masm.mulss(dest, temp7);
masm.por(temp1, temp3);
masm.andpd(temp5, temp1);
masm.paddd(dest, temp4);
// 0xc1a5f12e,
masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
// 0x40358874,
// 0x64d4ef0d,
// 0xc0089309
masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
// 0x385593b1,
masm.movdqu(temp4, new AMD64Address(gpr4, 16));
// 0xc025c917,
// 0xdc963467,
// 0x3ffc6a02
masm.subsd(temp1, temp5);
masm.movdl(gpr3, dest);
masm.psllq(dest, 29);
masm.andpd(dest, temp6);
// 0x509f7800,
masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
// 0x3f934413
masm.andl(gpr1, 32752);
masm.subl(gpr1, gpr2);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.mulsd(temp1, dest);
masm.subsd(temp5, temp2);
// 0x7f9d3aa1,
masm.movdqu(temp2, new AMD64Address(gpr4, 32));
// 0x4016ab9f,
// 0xdc77b115,
// 0xbff27af2
masm.leaq(gpr4, externalAddress(logTenTablePtr));
masm.andl(gpr3, 16711680);
masm.shrl(gpr3, 12);
masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
masm.addsd(temp1, temp5);
masm.mulsd(temp6, temp7);
masm.pshufd(temp5, temp1, 0x44);
masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
// 0x1f12b358,
masm.mulsd(temp7, new AMD64Address(gpr4, 8));
// 0x3cdfef31
masm.mulsd(temp3, temp1);
masm.addsd(dest, temp6);
masm.mulpd(temp4, temp5);
masm.leaq(gpr4, externalAddress(logTenEPtr));
// 0xbf2e4108,
masm.movdq(temp6, new AMD64Address(gpr4, 8));
// 0x3f5a7a6c
masm.mulpd(temp5, temp5);
masm.addpd(temp4, temp2);
masm.mulpd(temp3, temp5);
masm.pshufd(temp2, dest, 0xE4);
masm.addsd(dest, temp1);
masm.mulsd(temp4, temp1);
masm.subsd(temp2, dest);
masm.mulsd(temp6, temp1);
masm.addsd(temp1, temp2);
masm.pshufd(temp2, dest, 0xEE);
masm.mulsd(temp5, temp5);
masm.addsd(temp7, temp2);
masm.addsd(temp1, temp6);
masm.addpd(temp4, temp3);
masm.addsd(temp1, temp7);
masm.mulpd(temp4, temp5);
masm.addsd(temp1, temp4);
masm.pshufd(temp5, temp4, 0xEE);
masm.addsd(temp1, temp5);
masm.addsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb0);
masm.movdq(dest, stackSlot);
masm.movdq(temp1, stackSlot);
masm.addl(gpr1, 16);
masm.cmpl(gpr1, 32768);
masm.jcc(ConditionFlag.AboveEqual, bb2);
masm.cmpl(gpr1, 16);
masm.jcc(ConditionFlag.Below, bb3);
masm.bind(bb4);
masm.addsd(dest, dest);
masm.jmp(bb8);
masm.bind(bb5);
masm.jcc(ConditionFlag.Above, bb4);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Above, bb4);
masm.jmp(bb6);
masm.bind(bb3);
masm.xorpd(temp1, temp1);
masm.addsd(temp1, dest);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 18416);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp1, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 18416);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.jmp(bb1);
masm.bind(bb2);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.addl(gpr2, gpr2);
masm.cmpl(gpr2, -2097152);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.bind(bb6);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 49136);
masm.pinsrw(dest, gpr1, 3);
masm.divsd(dest, temp1);
masm.bind(bb8);
}
Aggregations