use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB in project graal by oracle.
the class AMD64NodeMatchRules method subMemory.
@MatchRule("(Sub value Read=access)")
@MatchRule("(Sub value FloatingRead=access)")
public ComplexMatchResult subMemory(ValueNode value, LIRLowerableAccess access) {
OperandSize size = getMemorySize(access);
if (size.isXmmType()) {
TargetDescription target = getLIRGeneratorTool().target();
boolean isAvx = ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX);
if (isAvx) {
return binaryRead(AVXOp.SUB, size, value, access);
} else {
return binaryRead(SSEOp.SUB, size, value, access);
}
} else {
return binaryRead(SUB.getRMOpcode(size), size, value, access);
}
}
use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB in project graal by oracle.
the class AMD64ArrayCompareToOp method emitCode.
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
Register result = asRegister(resultValue);
Register str1 = asRegister(temp1);
Register str2 = asRegister(temp2);
// Load array base addresses.
masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
Register cnt1 = asRegister(length1Value);
Register cnt2 = asRegister(length2Value);
// Checkstyle: stop
Label LENGTH_DIFF_LABEL = new Label();
Label POP_LABEL = new Label();
Label DONE_LABEL = new Label();
Label WHILE_HEAD_LABEL = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label();
int stride, stride2;
int adr_stride = -1;
int adr_stride1 = -1;
int adr_stride2 = -1;
// Checkstyle: resume
int stride2x2 = 0x40;
AMD64Address.Scale scale = null;
AMD64Address.Scale scale1 = null;
AMD64Address.Scale scale2 = null;
// if (ae != StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2x2 = 0x20;
}
// if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
if (kind1 != kind2) {
masm.shrl(cnt2, 1);
}
// Compute the minimum of the string lengths and the
// difference of the string lengths (stack).
// Do the conditional move stuff
masm.movl(result, cnt1);
masm.subl(cnt1, cnt2);
masm.push(cnt1);
// cnt2 = min(cnt1, cnt2)
masm.cmovl(ConditionFlag.LessEqual, cnt2, result);
// Is the minimum length zero?
masm.testl(cnt2, cnt2);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
// Load first bytes
// result = str1[0]
masm.movzbl(result, new AMD64Address(str1, 0));
// cnt1 = str2[0]
masm.movzbl(cnt1, new AMD64Address(str2, 0));
// } else if (ae == StrIntrinsicNode::UU) {
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Load first characters
masm.movzwl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
} else {
masm.movzbl(result, new AMD64Address(str1, 0));
masm.movzwl(cnt1, new AMD64Address(str2, 0));
}
masm.subl(result, cnt1);
masm.jcc(ConditionFlag.NotZero, POP_LABEL);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide length by 2 to get number of chars
masm.shrl(cnt2, 1);
}
masm.cmpl(cnt2, 1);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.cmpptr(str1, str2);
masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
scale = AMD64Address.Scale.Times1;
stride = 16;
} else {
scale = AMD64Address.Scale.Times2;
stride = 8;
}
} else {
scale1 = AMD64Address.Scale.Times1;
scale2 = AMD64Address.Scale.Times2;
// scale not used
stride = 8;
}
// if (UseAVX >= 2 && UseSSE42Intrinsics) {
if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_WIDE_TAIL = new Label();
Label COMPARE_SMALL_STR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP = new Label();
Label COMPARE_16_CHARS = new Label();
Label COMPARE_INDEX_CHAR = new Label();
Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
Label COMPARE_TAIL_LONG = new Label();
// used only _LP64 && AVX3
Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
stride2 = 32;
} else {
stride2 = 16;
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
adr_stride = stride << scale.log2;
} else {
// stride << scale1;
adr_stride1 = 8;
// stride << scale2;
adr_stride2 = 16;
}
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
// rax and rdx are used by pcmpestri as elements counters
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride2 - 1));
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
// fast path : compare first 2 8-char vectors.
masm.bind(COMPARE_16_CHARS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
masm.addl(cnt1, stride);
// Compare the characters at index in cnt1
// cnt1 has the offset of the mismatching character
masm.bind(COMPARE_INDEX_CHAR);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmp(POP_LABEL);
// Setup the registers to start vector comparison loop
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.subl(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
masm.negq(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
masm.bind(COMPARE_WIDE_VECTORS_LOOP);
// if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
if (supportsAVX512VLBW(crb.target)) {
masm.cmpl(cnt2, stride2x2);
masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// cnt2 holds the vector count
masm.testl(cnt2, stride2x2 - 1);
// means we cannot subtract by 0x40
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
// the hottest loop
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
// k7 == 11..11, if operands equal, otherwise k7 has some 0
masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
}
masm.kortestql(k7, k7);
// miscompare
masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED);
// update since we already compared at this addr
masm.addq(result, stride2x2);
// and sub the size too
masm.subl(cnt2, stride2x2);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
masm.vpxor(vec1, vec1, vec1);
masm.jmpb(COMPARE_WIDE_TAIL);
}
masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
} else {
masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
}
masm.vptest(vec1, vec1);
masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
masm.addq(result, stride2);
masm.subl(cnt2, stride2);
masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// compare wide vectors tail
masm.bind(COMPARE_WIDE_TAIL);
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(result, stride2);
masm.movl(cnt2, result);
masm.negq(result);
masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
masm.bind(VECTOR_NOT_EQUAL);
// clean upper bits of YMM registers
masm.vpxor(vec1, vec1, vec1);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.jmp(COMPARE_16_CHARS);
// Compare tail chars, length between 1 to 15 chars
masm.bind(COMPARE_TAIL_LONG);
masm.movl(cnt2, result);
masm.cmpl(cnt2, stride);
masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, 0));
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
}
masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
masm.subq(cnt2, stride);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(cnt2);
masm.jmpb(WHILE_HEAD_LABEL);
masm.bind(COMPARE_SMALL_STR);
} else if (supportsSSE42(crb.target)) {
Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
// Checkstyle: stop
Label COMPARE_WIDE_VECTORS = new Label();
Label VECTOR_NOT_EQUAL = new Label();
Label COMPARE_TAIL = new Label();
// Checkstyle: resume
int pcmpmask = 0x19;
// Setup to compare 8-char (16-byte) vectors,
// start from first character again because it has aligned address.
masm.movl(result, cnt2);
// cnt2 holds the vector count
masm.andl(cnt2, ~(stride - 1));
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
pcmpmask &= ~0x01;
}
masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, result, scale));
masm.leaq(str2, new AMD64Address(str2, result, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, result, scale1));
masm.leaq(str2, new AMD64Address(str2, result, scale2));
}
masm.negq(result);
// rcx - first mismatched element index
assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
masm.bind(COMPARE_WIDE_VECTORS);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
// After pcmpestri cnt1(rcx) contains mismatched element index
// CF==1
masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL);
masm.addq(result, stride);
masm.subq(cnt2, stride);
masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
// compare wide vectors tail
masm.testq(result, result);
masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
masm.movl(cnt2, stride);
masm.movl(result, stride);
masm.negq(result);
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.movdqu(vec1, new AMD64Address(str1, result, scale));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
} else {
masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
}
masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
// Mismatched characters in the vectors
masm.bind(VECTOR_NOT_EQUAL);
masm.addq(cnt1, result);
loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
masm.subl(result, cnt2);
masm.jmpb(POP_LABEL);
// limit is zero
masm.bind(COMPARE_TAIL);
masm.movl(cnt2, result);
// Fallthru to tail compare
}
// if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
if (kind1 == kind2) {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
} else {
masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
}
// first character was compared already
masm.decrementl(cnt2);
masm.negq(cnt2);
// Compare the rest of the elements
masm.bind(WHILE_HEAD_LABEL);
loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
masm.subl(result, cnt1);
masm.jccb(ConditionFlag.NotZero, POP_LABEL);
masm.incrementq(cnt2, 1);
masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
// Strings are equal up to min length. Return the length difference.
masm.bind(LENGTH_DIFF_LABEL);
masm.pop(result);
// if (ae == StrIntrinsicNode::UU) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
// Divide diff by 2 to get number of chars
masm.sarl(result, 1);
}
masm.jmpb(DONE_LABEL);
// if (VM_Version::supports_avx512vlbw()) {
if (supportsAVX512VLBW(crb.target)) {
masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
masm.kmovql(cnt1, k7);
masm.notq(cnt1);
masm.bsfq(cnt2, cnt1);
// if (ae != StrIntrinsicNode::LL) {
if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
// Divide diff by 2 to get number of chars
masm.sarl(cnt2, 1);
}
masm.addq(result, cnt2);
// if (ae == StrIntrinsicNode::LL) {
if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
} else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
masm.movzwl(result, new AMD64Address(str1, result, scale));
} else {
masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
masm.movzbl(result, new AMD64Address(str1, result, scale1));
}
masm.subl(result, cnt1);
masm.jmpb(POP_LABEL);
}
// Discard the stored length difference
masm.bind(POP_LABEL);
masm.pop(cnt1);
// That's it
masm.bind(DONE_LABEL);
// if (ae == StrIntrinsicNode::UL) {
if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
masm.negl(result);
}
}
Aggregations