use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64Assembler method emitOperandHelper.
/**
* Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
* extension in the R field.
*
* @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
* @param additionalInstructionSize the number of bytes that will be emitted after the operand,
* so that the start position of the next instruction can be computed even though
* this instruction has not been completely emitted yet.
*/
protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
assert (reg & 0x07) == reg;
int regenc = reg << 3;
Register base = addr.getBase();
Register index = addr.getIndex();
AMD64Address.Scale scale = addr.getScale();
int disp = addr.getDisplacement();
if (base.equals(AMD64.rip)) {
// [00 000 101] disp32
assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
emitByte(0x05 | regenc);
if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
}
emitInt(disp);
} else if (base.isValid()) {
int baseenc = base.isValid() ? encode(base) : 0;
if (index.isValid()) {
int indexenc = encode(index) << 3;
// [base + indexscale + disp]
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
// [00 reg 100][ss index base]
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x04 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
} else if (isByte(disp) && !force4Byte) {
// [01 reg 100][ss index base] imm8
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x44 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
emitByte(disp & 0xFF);
} else {
// [10 reg 100][ss index base] disp32
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x84 | regenc);
emitByte(scale.log2 << 6 | indexenc | baseenc);
emitInt(disp);
}
} else if (base.equals(rsp) || base.equals(r12)) {
// [rsp + disp]
if (disp == 0) {
// [rsp]
// [00 reg 100][00 100 100]
emitByte(0x04 | regenc);
emitByte(0x24);
} else if (isByte(disp) && !force4Byte) {
// [rsp + imm8]
// [01 reg 100][00 100 100] disp8
emitByte(0x44 | regenc);
emitByte(0x24);
emitByte(disp & 0xFF);
} else {
// [rsp + imm32]
// [10 reg 100][00 100 100] disp32
emitByte(0x84 | regenc);
emitByte(0x24);
emitInt(disp);
}
} else {
// [base + disp]
assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
// [base]
// [00 reg base]
emitByte(0x00 | regenc | baseenc);
} else if (isByte(disp) && !force4Byte) {
// [base + disp8]
// [01 reg base] disp8
emitByte(0x40 | regenc | baseenc);
emitByte(disp & 0xFF);
} else {
// [base + disp32]
// [10 reg base] disp32
emitByte(0x80 | regenc | baseenc);
emitInt(disp);
}
}
} else {
if (index.isValid()) {
int indexenc = encode(index) << 3;
// [00 reg 100][ss index 101] disp32
assert !index.equals(rsp) : "illegal addressing mode";
emitByte(0x04 | regenc);
emitByte(scale.log2 << 6 | indexenc | 0x05);
emitInt(disp);
} else {
// [disp] ABSOLUTE
// [00 reg 100][00 100 101] disp32
emitByte(0x04 | regenc);
emitByte(0x25);
emitInt(disp);
}
}
setCurAttributes(null);
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.
/*
* Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
* Source Code
*
* ALGORITHM DESCRIPTION - LOG10() ---------------------
*
* Let x=2^k * mx, mx in [1,2)
*
* Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
* short approximation for log10(e)
*
* Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
*
* Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
* (high, low parts) Result is formed from high and low parts
*
* Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
* NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
*
*/
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movdq(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
// 0xf8000000,
masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
// 0xffffffff,
// 0x00000000,
// 0xffffe000
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movl(gpr2, 1054736384);
masm.movdl(temp7, gpr2);
masm.xorpd(temp3, temp3);
masm.movl(gpr3, 30704);
masm.pinsrw(temp3, gpr3, 3);
masm.movl(gpr3, 32768);
masm.movdl(temp4, gpr3);
masm.movdqu(temp1, value);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 16352);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.bind(bb1);
masm.mulss(dest, temp7);
masm.por(temp1, temp3);
masm.andpd(temp5, temp1);
masm.paddd(dest, temp4);
// 0xc1a5f12e,
masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
// 0x40358874,
// 0x64d4ef0d,
// 0xc0089309
masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
// 0x385593b1,
masm.movdqu(temp4, new AMD64Address(gpr4, 16));
// 0xc025c917,
// 0xdc963467,
// 0x3ffc6a02
masm.subsd(temp1, temp5);
masm.movdl(gpr3, dest);
masm.psllq(dest, 29);
masm.andpd(dest, temp6);
// 0x509f7800,
masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
// 0x3f934413
masm.andl(gpr1, 32752);
masm.subl(gpr1, gpr2);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulpd(temp5, dest);
masm.mulsd(temp1, dest);
masm.subsd(temp5, temp2);
// 0x7f9d3aa1,
masm.movdqu(temp2, new AMD64Address(gpr4, 32));
// 0x4016ab9f,
// 0xdc77b115,
// 0xbff27af2
masm.leaq(gpr4, externalAddress(logTenTablePtr));
masm.andl(gpr3, 16711680);
masm.shrl(gpr3, 12);
masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
masm.addsd(temp1, temp5);
masm.mulsd(temp6, temp7);
masm.pshufd(temp5, temp1, 0x44);
masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
// 0x1f12b358,
masm.mulsd(temp7, new AMD64Address(gpr4, 8));
// 0x3cdfef31
masm.mulsd(temp3, temp1);
masm.addsd(dest, temp6);
masm.mulpd(temp4, temp5);
masm.leaq(gpr4, externalAddress(logTenEPtr));
// 0xbf2e4108,
masm.movdq(temp6, new AMD64Address(gpr4, 8));
// 0x3f5a7a6c
masm.mulpd(temp5, temp5);
masm.addpd(temp4, temp2);
masm.mulpd(temp3, temp5);
masm.pshufd(temp2, dest, 0xE4);
masm.addsd(dest, temp1);
masm.mulsd(temp4, temp1);
masm.subsd(temp2, dest);
masm.mulsd(temp6, temp1);
masm.addsd(temp1, temp2);
masm.pshufd(temp2, dest, 0xEE);
masm.mulsd(temp5, temp5);
masm.addsd(temp7, temp2);
masm.addsd(temp1, temp6);
masm.addpd(temp4, temp3);
masm.addsd(temp1, temp7);
masm.mulpd(temp4, temp5);
masm.addsd(temp1, temp4);
masm.pshufd(temp5, temp4, 0xEE);
masm.addsd(temp1, temp5);
masm.addsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb0);
masm.movdq(dest, stackSlot);
masm.movdq(temp1, stackSlot);
masm.addl(gpr1, 16);
masm.cmpl(gpr1, 32768);
masm.jcc(ConditionFlag.AboveEqual, bb2);
masm.cmpl(gpr1, 16);
masm.jcc(ConditionFlag.Below, bb3);
masm.bind(bb4);
masm.addsd(dest, dest);
masm.jmp(bb8);
masm.bind(bb5);
masm.jcc(ConditionFlag.Above, bb4);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Above, bb4);
masm.jmp(bb6);
masm.bind(bb3);
masm.xorpd(temp1, temp1);
masm.addsd(temp1, dest);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(temp2, temp2);
masm.movl(gpr1, 18416);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.movl(gpr1, 16368);
masm.pinsrw(temp2, gpr1, 3);
masm.movdqu(temp1, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 18416);
masm.psrlq(dest, 27);
// 0x00000000,
masm.movdqu(temp2, externalAddress(logTenEPtr));
// 0x3fdbc000,
// 0xbf2e4108,
// 0x3f5a7a6c
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0x4E);
masm.psrlq(temp1, 12);
masm.jmp(bb1);
masm.bind(bb2);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.addl(gpr2, gpr2);
masm.cmpl(gpr2, -2097152);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.bind(bb6);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 49136);
masm.pinsrw(dest, gpr1, 3);
masm.divsd(dest, temp1);
masm.bind(bb8);
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64ArrayEqualsOp method emit8ByteCompare.
/**
* Emits code that uses 8-byte vector compares.
*/
private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
Label loop = new Label();
Label compareTail = new Label();
boolean requiresNaNCheck = kind.isNumericFloat();
Label loopCheck = new Label();
Label nanCheck = new Label();
Register temp = asRegister(temp4);
// tail count (in bytes)
masm.andl(result, VECTOR_SIZE - 1);
// vector count (in bytes)
masm.andl(length, ~(VECTOR_SIZE - 1));
masm.jcc(ConditionFlag.Zero, compareTail);
masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
masm.negq(length);
// Align the main loop
masm.align(crb.target.wordSize * 2);
masm.bind(loop);
masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0));
masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0));
masm.jcc(ConditionFlag.NotEqual, requiresNaNCheck ? nanCheck : falseLabel);
masm.bind(loopCheck);
masm.addq(length, VECTOR_SIZE);
masm.jccb(ConditionFlag.NotZero, loop);
masm.testl(result, result);
masm.jcc(ConditionFlag.Zero, trueLabel);
if (requiresNaNCheck) {
// NaN check is slow path and hence placed outside of the main loop.
Label unalignedCheck = new Label();
masm.jmpb(unalignedCheck);
masm.bind(nanCheck);
// At most two iterations, unroll in the emitted code.
for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
emitFloatCompare(masm, array1, array2, length, offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
}
masm.jmpb(loopCheck);
masm.bind(unalignedCheck);
}
/*
* Compare the remaining bytes with an unaligned memory load aligned to the end of the
* array.
*/
masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE));
masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE));
if (requiresNaNCheck) {
masm.jcc(ConditionFlag.Equal, trueLabel);
// At most two iterations, unroll in the emitted code.
for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
emitFloatCompare(masm, array1, array2, result, -VECTOR_SIZE + offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
}
} else {
masm.jccb(ConditionFlag.NotEqual, falseLabel);
}
masm.jmpb(trueLabel);
masm.bind(compareTail);
masm.movl(length, result);
}
Aggregations