use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64HotSpotCounterOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb) {
AMD64MacroAssembler masm = (AMD64MacroAssembler) crb.asm;
TargetDescription target = crb.target;
Register scratch;
// want to spill it to the stack.
if (!contains(increments, rax)) {
scratch = rax;
} else if (!contains(increments, rbx)) {
scratch = rbx;
} else {
// emitIncrement().
throw GraalError.unimplemented("RAX and RBX are increment registers at the same time, spilling over the scratch register is not supported right now");
}
// address for counters array
AMD64Address countersArrayAddr = new AMD64Address(thread, config.jvmciCountersThreadOffset);
Register countersArrayReg = scratch;
// backup scratch register
masm.movq((AMD64Address) crb.asAddress(backupSlot), scratch);
// load counters array
masm.movptr(countersArrayReg, countersArrayAddr);
CounterProcedure emitProcedure = (counterIndex, increment, displacement) -> emitIncrement(masm, countersArrayReg, increment, displacement);
forEachCounter(emitProcedure, target);
// restore scratch register
masm.movq(scratch, (AMD64Address) crb.asAddress(backupSlot));
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64HotSpotPushInterpreterFrameOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
final Register frameSizeRegister = asRegister(frameSize);
final Register framePcRegister = asRegister(framePc);
final Register senderSpRegister = asRegister(senderSp);
final Register initialInfoRegister = asRegister(initialInfo);
final int wordSize = 8;
// We'll push PC and BP by hand.
masm.subq(frameSizeRegister, 2 * wordSize);
// Push return address.
masm.push(framePcRegister);
// Prolog
masm.push(initialInfoRegister);
masm.movq(initialInfoRegister, rsp);
masm.subq(rsp, frameSizeRegister);
// This value is corrected by layout_activation_impl.
masm.movptr(new AMD64Address(initialInfoRegister, config.frameInterpreterFrameLastSpOffset * wordSize), 0);
// Make the frame walkable.
masm.movq(new AMD64Address(initialInfoRegister, config.frameInterpreterFrameSenderSpOffset * wordSize), senderSpRegister);
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64ArrayCompareToOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Register result = asRegister(resultValue);
Register str1 = asRegister(temp1);
Register str2 = asRegister(temp2);
// Load array base addresses.
masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
Register cnt1 = asRegister(length1Value);
Register cnt2 = asRegister(length2Value);
// Checkstyle: stop
Label LENGTH_DIFF_LABEL = new Label();
Label POP_LABEL = new Label();
Label DONE_LABEL = new Label();
Label WHILE_HEAD_LABEL = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label();
int stride, stride2;
int adr_stride = -1;
int adr_stride1 = -1;
int adr_stride2 = -1;
// Checkstyle: resume
int stride2x2 = 0x40;
AMD64Address.Scale scale = null;
AMD64Address.Scale scale1 = null;
AMD64Address.Scale scale2 = null;
// if (ae != StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2x2 = 0x20;
}
// if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
if (kind1 != kind2) {
masm.shrl(cnt2, 1);
}
// Compute the minimum of the string lengths and the
// difference of the string lengths (stack).
// Do the conditional move stuff
masm.movl(result, cnt1);
masm.subl(cnt1, cnt2);
masm.push(cnt1);
// cnt2 = min(cnt1, cnt2)
masm.cmovl(ConditionFlag.LessEqual, cnt2, result);
// Is the minimum length zero?
masm.testl(cnt2, cnt2);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
// Load first bytes
// result = str1[0]
masm.movzbl(result, new AMD64Address(str1, 0));
// cnt1 = str2[0]
masm.movzbl(cnt1, new AMD64Address(str2, 0));
// } else if (ae == StrIntrinsicNode::UU) {
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Load first characters
masm.movzwl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
} else {
masm.movzbl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
}
masm.subl(result, cnt1);
masm.jcc(ConditionFlag.NotZero, POP_LABEL);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide length by 2 to get number of chars
masm.shrl(cnt2, 1);
}
masm.cmpl(cnt2, 1);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.cmpptr(str1, str2);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
scale = AMD64Address.Scale.Times1;
stride = 16;
} else {
scale = AMD64Address.Scale.Times2;
stride = 8;
}
} else {
scale1 = AMD64Address.Scale.Times1;
scale2 = AMD64Address.Scale.Times2;
// scale not used
stride = 8;
}
// if (UseAVX >= 2 && UseSSE42Intrinsics) {
if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_WIDE_TAIL = new Label();
Label COMPARE_SMALL_STR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP = new Label();
Label COMPARE_16_CHARS = new Label();
Label COMPARE_INDEX_CHAR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
Label COMPARE_TAIL_LONG = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2 = 32;
} else {
stride2 = 16;
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
adr_stride = stride << scale.log2;
} else {
// stride << scale1;
adr_stride1 = 8;
// stride << scale2;
adr_stride2 = 16;
}
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
// rax and rdx are used by pcmpestri as elements counters
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride2 - 1));
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
// fast path : compare first 2 8-char vectors.
masm.bind(COMPARE_16_CHARS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
masm.addl(cnt1, stride);
// Compare the characters at index in cnt1
// cnt1 has the offset of the mismatching character
masm.bind(COMPARE_INDEX_CHAR);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmp(POP_LABEL);
// Setup the registers to start vector comparison loop
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.subl(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
masm.negq(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
masm.bind(COMPARE_WIDE_VECTORS_LOOP);
// if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
if (supportsAVX512VLBW(crb.target)) {
masm.cmpl(cnt2, stride2x2);
masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// cnt2 holds the vector count
masm.testl(cnt2, stride2x2 - 1);
// means we cannot subtract by 0x40
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// the hottest loop
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
}
masm.kortestql(k7, k7);
// miscompare
masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED);
// update since we already compared at this addr
masm.addq(result, stride2x2);
// and sub the size too
masm.subl(cnt2, stride2x2);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
masm.vpxor(vec1, vec1, vec1);
masm.jmpb(COMPARE_WIDE_TAIL);
}
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
}
masm.vptest(vec1, vec1);
masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
masm.addq(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// compare wide vectors tail
masm.bind(COMPARE_WIDE_TAIL);
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(result, stride2);
masm.movl(cnt2, result);
masm.negq(result);
masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
masm.bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
masm.bind(COMPARE_TAIL_LONG);
masm.movl(cnt2, result);
masm.cmpl(cnt2, stride);
masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
masm.subq(cnt2, stride);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(cnt2);
masm.jmpb(WHILE_HEAD_LABEL);
masm.bind(COMPARE_SMALL_STR);
} else if (supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_TAIL = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// Setup to compare 8-char (16-byte) vectors,
// start from first character again because it has aligned address.
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride - 1));
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(result);
// rcx - first mismatched element index
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
// After pcmpestri cnt1(rcx) contains mismatched element index
// CF==1
masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL);
masm.addq(result, stride);
masm.subq(cnt2, stride);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
// compare wide vectors tail
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(cnt2, stride);
masm.movl(result, stride);
masm.negq(result);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
// Mismatched characters in the vectors
masm.bind(VECTOR_NOT_EQUAL);
masm.addq(cnt1, result);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmpb(POP_LABEL);
// limit is zero
masm.bind(COMPARE_TAIL);
masm.movl(cnt2, result);
// Fallthru to tail compare
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
}
// first character was compared already
masm.decrementl(cnt2);
masm.negq(cnt2);
// Compare the rest of the elements
masm.bind(WHILE_HEAD_LABEL);
loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
masm.subl(result, cnt1);
masm.jccb(ConditionFlag.NotZero, POP_LABEL);
masm.incrementq(cnt2, 1);
masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
masm.bind(LENGTH_DIFF_LABEL);
masm.pop(result);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide diff by 2 to get number of chars
masm.sarl(result, 1);
}
masm.jmpb(DONE_LABEL);
// if (VM_Version::supports_avx512vlbw()) {
if (supportsAVX512VLBW(crb.target)) {
masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
masm.kmovql(cnt1, k7);
masm.notq(cnt1);
masm.bsfq(cnt2, cnt1);
// if (ae != StrIntrinsicNode::LL) {
if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
// Divide diff by 2 to get number of chars
masm.sarl(cnt2, 1);
}
masm.addq(result, cnt2);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
masm.movzwl(result, new AMD64Address(str1, result, scale));
} else {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
masm.movzbl(result, new AMD64Address(str1, result, scale1));
}
masm.subl(result, cnt1);
masm.jmpb(POP_LABEL);
}
// Discard the stored length difference
masm.bind(POP_LABEL);
masm.pop(cnt1);
// That's it
masm.bind(DONE_LABEL);
// if (ae == StrIntrinsicNode::UL) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
masm.negl(result);
}
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64ArrayEqualsOp method emitTailCompares.
/**
* Emits code to compare the remaining 1 to 4 bytes.
*/
private void emitTailCompares(AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
Label compare2Bytes = new Label();
Label compare1Byte = new Label();
Register temp = asRegister(temp4);
if (kind.getByteCount() <= 4) {
// Compare trailing 4 bytes, if any.
masm.testl(result, 4);
masm.jccb(ConditionFlag.Zero, compare2Bytes);
masm.movl(temp, new AMD64Address(array1, 0));
masm.cmpl(temp, new AMD64Address(array2, 0));
if (kind == JavaKind.Float) {
masm.jccb(ConditionFlag.Equal, trueLabel);
emitFloatCompare(masm, array1, array2, Register.None, 0, falseLabel, true);
masm.jmpb(trueLabel);
} else {
masm.jccb(ConditionFlag.NotEqual, falseLabel);
}
if (kind.getByteCount() <= 2) {
// Move array pointers forward.
masm.leaq(array1, new AMD64Address(array1, 4));
masm.leaq(array2, new AMD64Address(array2, 4));
// Compare trailing 2 bytes, if any.
masm.bind(compare2Bytes);
masm.testl(result, 2);
masm.jccb(ConditionFlag.Zero, compare1Byte);
masm.movzwl(temp, new AMD64Address(array1, 0));
masm.movzwl(length, new AMD64Address(array2, 0));
masm.cmpl(temp, length);
masm.jccb(ConditionFlag.NotEqual, falseLabel);
// The one-byte tail compare is only required for boolean and byte arrays.
if (kind.getByteCount() <= 1) {
// Move array pointers forward before we compare the last trailing byte.
masm.leaq(array1, new AMD64Address(array1, 2));
masm.leaq(array2, new AMD64Address(array2, 2));
// Compare trailing byte, if any.
masm.bind(compare1Byte);
masm.testl(result, 1);
masm.jccb(ConditionFlag.Zero, trueLabel);
masm.movzbl(temp, new AMD64Address(array1, 0));
masm.movzbl(length, new AMD64Address(array2, 0));
masm.cmpl(temp, length);
masm.jccb(ConditionFlag.NotEqual, falseLabel);
} else {
masm.bind(compare1Byte);
}
} else {
masm.bind(compare2Bytes);
}
}
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.
the class AMD64MathIntrinsicUnaryOp method logIntrinsic.
/*
* Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
* Source Code
*
* ALGORITHM DESCRIPTION - LOG() ---------------------
*
* x=2^k * mx, mx in [1,2)
*
* Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7
*
* Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
*
* Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7
* polynomial -log(B) read from data table (high, low parts) Result is formed from high and low
* parts.
*
* Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0)
* = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception
* raised if x < -0, including -INF
*
*/
public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16);
ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16);
ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16);
Label bb0 = new Label();
Label bb1 = new Label();
Label bb2 = new Label();
Label bb3 = new Label();
Label bb4 = new Label();
Label bb5 = new Label();
Label bb6 = new Label();
Label bb7 = new Label();
Label bb8 = new Label();
Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
setCrb(crb);
masm.movdq(stackSlot, value);
if (dest.encoding != value.encoding) {
masm.movdqu(dest, value);
}
masm.movq(gpr1, 0x3ff0000000000000L);
masm.movdq(temp2, gpr1);
masm.movq(gpr3, 0x77f0000000000000L);
masm.movdq(temp3, gpr3);
masm.movl(gpr2, 32768);
masm.movdl(temp4, gpr2);
masm.movq(gpr2, 0xffffe00000000000L);
masm.movdq(temp5, gpr2);
masm.movdqu(temp1, value);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.movl(gpr2, 16352);
masm.psrlq(dest, 27);
masm.leaq(gpr4, externalAddress(logTwoTablePtr));
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0xE4);
masm.psrlq(temp1, 12);
masm.subl(gpr1, 16);
masm.cmpl(gpr1, 32736);
masm.jcc(ConditionFlag.AboveEqual, bb0);
masm.bind(bb1);
masm.paddd(dest, temp4);
masm.por(temp1, temp3);
masm.movdl(gpr3, dest);
masm.psllq(dest, 29);
masm.pand(temp5, temp1);
masm.pand(dest, temp6);
masm.subsd(temp1, temp5);
masm.mulpd(temp5, dest);
masm.andl(gpr1, 32752);
masm.subl(gpr1, gpr2);
masm.cvtsi2sdl(temp7, gpr1);
masm.mulsd(temp1, dest);
// 0xfefa3800,
masm.movdq(temp6, externalAddress(logTwoDataPtr));
// 0x3fa62e42
// 0x92492492,
masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr));
// 0x3fc24924,
// 0x00000000,
// 0xbfd00000
masm.subsd(temp5, temp2);
masm.andl(gpr3, 16711680);
masm.shrl(gpr3, 12);
masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0));
masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr));
// 0x3d6fb175,
masm.movdqu(temp4, new AMD64Address(gpr4, 16));
// 0xbfc5555e,
// 0x55555555,
// 0x3fd55555
masm.addsd(temp1, temp5);
// 0x9999999a,
masm.movdqu(temp2, new AMD64Address(gpr4, 32));
// 0x3fc99999,
// 0x00000000,
// 0xbfe00000
masm.mulsd(temp6, temp7);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp5, temp1);
} else {
masm.movdqu(temp5, temp1);
masm.movlhps(temp5, temp5);
}
masm.leaq(gpr4, externalAddress(logTwoDataPtr));
// 0x93c76730,
masm.mulsd(temp7, new AMD64Address(gpr4, 8));
// 0x3ceef357
masm.mulsd(temp3, temp1);
masm.addsd(dest, temp6);
masm.mulpd(temp4, temp5);
masm.mulpd(temp5, temp5);
if (masm.supports(CPUFeature.SSE3)) {
masm.movddup(temp6, dest);
} else {
masm.movdqu(temp6, dest);
masm.movlhps(temp6, temp6);
}
masm.addsd(dest, temp1);
masm.addpd(temp4, temp2);
masm.mulpd(temp3, temp5);
masm.subsd(temp6, dest);
masm.mulsd(temp4, temp1);
masm.pshufd(temp2, dest, 0xEE);
masm.addsd(temp1, temp6);
masm.mulsd(temp5, temp5);
masm.addsd(temp7, temp2);
masm.addpd(temp4, temp3);
masm.addsd(temp1, temp7);
masm.mulpd(temp4, temp5);
masm.addsd(temp1, temp4);
masm.pshufd(temp5, temp4, 0xEE);
masm.addsd(temp1, temp5);
masm.addsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb0);
masm.movdq(dest, stackSlot);
masm.movdq(temp1, stackSlot);
masm.addl(gpr1, 16);
masm.cmpl(gpr1, 32768);
masm.jcc(ConditionFlag.AboveEqual, bb2);
masm.cmpl(gpr1, 16);
masm.jcc(ConditionFlag.Below, bb3);
masm.bind(bb4);
masm.addsd(dest, dest);
masm.jmp(bb8);
masm.bind(bb5);
masm.jcc(ConditionFlag.Above, bb4);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Above, bb4);
masm.jmp(bb6);
masm.bind(bb3);
masm.xorpd(temp1, temp1);
masm.addsd(temp1, dest);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.xorpd(temp1, temp1);
masm.movl(gpr1, 18416);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.movdqu(temp1, dest);
masm.pextrw(gpr1, dest, 3);
masm.por(dest, temp2);
masm.psrlq(dest, 27);
masm.movl(gpr2, 18416);
masm.psrld(dest, 2);
masm.rcpps(dest, dest);
masm.psllq(temp1, 12);
masm.pshufd(temp6, temp5, 0xE4);
masm.psrlq(temp1, 12);
masm.jmp(bb1);
masm.bind(bb2);
masm.movdl(gpr3, temp1);
masm.psrlq(temp1, 32);
masm.movdl(gpr2, temp1);
masm.addl(gpr2, gpr2);
masm.cmpl(gpr2, -2097152);
masm.jcc(ConditionFlag.AboveEqual, bb5);
masm.orl(gpr3, gpr2);
masm.cmpl(gpr3, 0);
masm.jcc(ConditionFlag.Equal, bb7);
masm.bind(bb6);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 32752);
masm.pinsrw(temp1, gpr1, 3);
masm.mulsd(dest, temp1);
masm.jmp(bb8);
masm.bind(bb7);
masm.xorpd(temp1, temp1);
masm.xorpd(dest, dest);
masm.movl(gpr1, 49136);
masm.pinsrw(dest, gpr1, 3);
masm.divsd(dest, temp1);
masm.bind(bb8);
}
Aggregations