use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR in project graal by oracle.
the class AMD64ArrayCompareToOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Register result = asRegister(resultValue);
Register str1 = asRegister(temp1);
Register str2 = asRegister(temp2);
// Load array base addresses.
masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
Register cnt1 = asRegister(length1Value);
Register cnt2 = asRegister(length2Value);
// Checkstyle: stop
Label LENGTH_DIFF_LABEL = new Label();
Label POP_LABEL = new Label();
Label DONE_LABEL = new Label();
Label WHILE_HEAD_LABEL = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label();
int stride, stride2;
int adr_stride = -1;
int adr_stride1 = -1;
int adr_stride2 = -1;
// Checkstyle: resume
int stride2x2 = 0x40;
AMD64Address.Scale scale = null;
AMD64Address.Scale scale1 = null;
AMD64Address.Scale scale2 = null;
// if (ae != StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2x2 = 0x20;
}
// if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
if (kind1 != kind2) {
masm.shrl(cnt2, 1);
}
// Compute the minimum of the string lengths and the
// difference of the string lengths (stack).
// Do the conditional move stuff
masm.movl(result, cnt1);
masm.subl(cnt1, cnt2);
masm.push(cnt1);
// cnt2 = min(cnt1, cnt2)
masm.cmovl(ConditionFlag.LessEqual, cnt2, result);
// Is the minimum length zero?
masm.testl(cnt2, cnt2);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
// Load first bytes
// result = str1[0]
masm.movzbl(result, new AMD64Address(str1, 0));
// cnt1 = str2[0]
masm.movzbl(cnt1, new AMD64Address(str2, 0));
// } else if (ae == StrIntrinsicNode::UU) {
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Load first characters
masm.movzwl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
} else {
masm.movzbl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
}
masm.subl(result, cnt1);
masm.jcc(ConditionFlag.NotZero, POP_LABEL);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide length by 2 to get number of chars
masm.shrl(cnt2, 1);
}
masm.cmpl(cnt2, 1);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.cmpptr(str1, str2);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
scale = AMD64Address.Scale.Times1;
stride = 16;
} else {
scale = AMD64Address.Scale.Times2;
stride = 8;
}
} else {
scale1 = AMD64Address.Scale.Times1;
scale2 = AMD64Address.Scale.Times2;
// scale not used
stride = 8;
}
// if (UseAVX >= 2 && UseSSE42Intrinsics) {
if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_WIDE_TAIL = new Label();
Label COMPARE_SMALL_STR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP = new Label();
Label COMPARE_16_CHARS = new Label();
Label COMPARE_INDEX_CHAR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
Label COMPARE_TAIL_LONG = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2 = 32;
} else {
stride2 = 16;
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
adr_stride = stride << scale.log2;
} else {
// stride << scale1;
adr_stride1 = 8;
// stride << scale2;
adr_stride2 = 16;
}
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
// rax and rdx are used by pcmpestri as elements counters
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride2 - 1));
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
// fast path : compare first 2 8-char vectors.
masm.bind(COMPARE_16_CHARS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
masm.addl(cnt1, stride);
// Compare the characters at index in cnt1
// cnt1 has the offset of the mismatching character
masm.bind(COMPARE_INDEX_CHAR);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmp(POP_LABEL);
// Setup the registers to start vector comparison loop
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.subl(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
masm.negq(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
masm.bind(COMPARE_WIDE_VECTORS_LOOP);
// if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
if (supportsAVX512VLBW(crb.target)) {
masm.cmpl(cnt2, stride2x2);
masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// cnt2 holds the vector count
masm.testl(cnt2, stride2x2 - 1);
// means we cannot subtract by 0x40
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// the hottest loop
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
}
masm.kortestql(k7, k7);
// miscompare
masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED);
// update since we already compared at this addr
masm.addq(result, stride2x2);
// and sub the size too
masm.subl(cnt2, stride2x2);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
masm.vpxor(vec1, vec1, vec1);
masm.jmpb(COMPARE_WIDE_TAIL);
}
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
}
masm.vptest(vec1, vec1);
masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
masm.addq(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// compare wide vectors tail
masm.bind(COMPARE_WIDE_TAIL);
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(result, stride2);
masm.movl(cnt2, result);
masm.negq(result);
masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
masm.bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
masm.bind(COMPARE_TAIL_LONG);
masm.movl(cnt2, result);
masm.cmpl(cnt2, stride);
masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
masm.subq(cnt2, stride);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(cnt2);
masm.jmpb(WHILE_HEAD_LABEL);
masm.bind(COMPARE_SMALL_STR);
} else if (supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_TAIL = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// Setup to compare 8-char (16-byte) vectors,
// start from first character again because it has aligned address.
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride - 1));
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(result);
// rcx - first mismatched element index
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
// After pcmpestri cnt1(rcx) contains mismatched element index
// CF==1
masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL);
masm.addq(result, stride);
masm.subq(cnt2, stride);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
// compare wide vectors tail
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(cnt2, stride);
masm.movl(result, stride);
masm.negq(result);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
// Mismatched characters in the vectors
masm.bind(VECTOR_NOT_EQUAL);
masm.addq(cnt1, result);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmpb(POP_LABEL);
// limit is zero
masm.bind(COMPARE_TAIL);
masm.movl(cnt2, result);
// Fallthru to tail compare
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
}
// first character was compared already
masm.decrementl(cnt2);
masm.negq(cnt2);
// Compare the rest of the elements
masm.bind(WHILE_HEAD_LABEL);
loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
masm.subl(result, cnt1);
masm.jccb(ConditionFlag.NotZero, POP_LABEL);
masm.incrementq(cnt2, 1);
masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
masm.bind(LENGTH_DIFF_LABEL);
masm.pop(result);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide diff by 2 to get number of chars
masm.sarl(result, 1);
}
masm.jmpb(DONE_LABEL);
// if (VM_Version::supports_avx512vlbw()) {
if (supportsAVX512VLBW(crb.target)) {
masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
masm.kmovql(cnt1, k7);
masm.notq(cnt1);
masm.bsfq(cnt2, cnt1);
// if (ae != StrIntrinsicNode::LL) {
if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
// Divide diff by 2 to get number of chars
masm.sarl(cnt2, 1);
}
masm.addq(result, cnt2);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
masm.movzwl(result, new AMD64Address(str1, result, scale));
} else {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
masm.movzbl(result, new AMD64Address(str1, result, scale1));
}
masm.subl(result, cnt1);
masm.jmpb(POP_LABEL);
}
// Discard the stored length difference
masm.bind(POP_LABEL);
masm.pop(cnt1);
// That's it
masm.bind(DONE_LABEL);
// if (ae == StrIntrinsicNode::UL) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
masm.negl(result);
}
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR in project graal by oracle.
the class AMD64ArrayEqualsOp method emitFloatCompare.
/**
* Emits code to compare if two floats are bitwise equal or both NaN.
*/
private void emitFloatCompare(AMD64MacroAssembler masm, Register base1, Register base2, Register index, int offset, Label falseLabel, boolean skipBitwiseCompare) {
AMD64Address address1 = new AMD64Address(base1, index, Scale.Times1, offset);
AMD64Address address2 = new AMD64Address(base2, index, Scale.Times1, offset);
Label bitwiseEqual = new Label();
if (!skipBitwiseCompare) {
// Bitwise compare
Register temp = asRegister(temp4);
if (kind == JavaKind.Float) {
masm.movl(temp, address1);
masm.cmpl(temp, address2);
} else {
masm.movq(temp, address1);
masm.cmpq(temp, address2);
}
masm.jccb(ConditionFlag.Equal, bitwiseEqual);
}
emitNaNCheck(masm, address1, falseLabel);
emitNaNCheck(masm, address2, falseLabel);
masm.bind(bitwiseEqual);
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.OR in project graal by oracle.
the class AMD64HotSpotBackend method newCompilationResultBuilder.
@Override
public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRen, FrameMap frameMap, CompilationResult compilationResult, CompilationResultBuilderFactory factory) {
// Omit the frame if the method:
// - has no spill slots or other slots allocated during register allocation
// - has no callee-saved registers
// - has no incoming arguments passed on the stack
// - has no deoptimization points
// - makes no foreign calls (which require an aligned stack)
HotSpotLIRGenerationResult gen = (HotSpotLIRGenerationResult) lirGenRen;
LIR lir = gen.getLIR();
assert gen.getDeoptimizationRescueSlot() == null || frameMap.frameNeedsAllocating() : "method that can deoptimize must have a frame";
OptionValues options = lir.getOptions();
DebugContext debug = lir.getDebug();
boolean omitFrame = CanOmitFrame.getValue(options) && !frameMap.frameNeedsAllocating() && !lir.hasArgInCallerFrame() && !gen.hasForeignCall();
Stub stub = gen.getStub();
Assembler masm = createAssembler(frameMap);
HotSpotFrameContext frameContext = new HotSpotFrameContext(stub != null, omitFrame);
DataBuilder dataBuilder = new HotSpotDataBuilder(getCodeCache().getTarget());
CompilationResultBuilder crb = factory.createBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, dataBuilder, frameContext, options, debug, compilationResult);
crb.setTotalFrameSize(frameMap.totalFrameSize());
crb.setMaxInterpreterFrameSize(gen.getMaxInterpreterFrameSize());
StackSlot deoptimizationRescueSlot = gen.getDeoptimizationRescueSlot();
if (deoptimizationRescueSlot != null && stub == null) {
crb.compilationResult.setCustomStackAreaOffset(deoptimizationRescueSlot);
}
if (stub != null) {
EconomicSet<Register> destroyedCallerRegisters = gatherDestroyedCallerRegisters(lir);
updateStub(stub, destroyedCallerRegisters, gen.getCalleeSaveInfo(), frameMap);
}
return crb;
}
Aggregations