Search in sources :

Example 1 with SUB

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB in project graal by oracle.

the class AMD64NodeMatchRules method subMemory.

@MatchRule("(Sub value Read=access)")
@MatchRule("(Sub value FloatingRead=access)")
public ComplexMatchResult subMemory(ValueNode value, LIRLowerableAccess access) {
    OperandSize size = getMemorySize(access);
    if (size.isXmmType()) {
        TargetDescription target = getLIRGeneratorTool().target();
        boolean isAvx = ((AMD64) target.arch).getFeatures().contains(CPUFeature.AVX);
        if (isAvx) {
            return binaryRead(AVXOp.SUB, size, value, access);
        } else {
            return binaryRead(SSEOp.SUB, size, value, access);
        }
    } else {
        return binaryRead(SUB.getRMOpcode(size), size, value, access);
    }
}
Also used : TargetDescription(jdk.vm.ci.code.TargetDescription) OperandSize(org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize) MatchRule(org.graalvm.compiler.core.match.MatchRule)

Example 2 with SUB

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.SUB in project graal by oracle.

the class AMD64ArrayCompareToOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    Register result = asRegister(resultValue);
    Register str1 = asRegister(temp1);
    Register str2 = asRegister(temp2);
    // Load array base addresses.
    masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
    masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
    Register cnt1 = asRegister(length1Value);
    Register cnt2 = asRegister(length2Value);
    // Checkstyle: stop
    Label LENGTH_DIFF_LABEL = new Label();
    Label POP_LABEL = new Label();
    Label DONE_LABEL = new Label();
    Label WHILE_HEAD_LABEL = new Label();
    // used only _LP64 && AVX3
    Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label();
    int stride, stride2;
    int adr_stride = -1;
    int adr_stride1 = -1;
    int adr_stride2 = -1;
    // Checkstyle: resume
    int stride2x2 = 0x40;
    AMD64Address.Scale scale = null;
    AMD64Address.Scale scale1 = null;
    AMD64Address.Scale scale2 = null;
    // if (ae != StrIntrinsicNode::LL) {
    if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
        stride2x2 = 0x20;
    }
    // if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
    if (kind1 != kind2) {
        masm.shrl(cnt2, 1);
    }
    // Compute the minimum of the string lengths and the
    // difference of the string lengths (stack).
    // Do the conditional move stuff
    masm.movl(result, cnt1);
    masm.subl(cnt1, cnt2);
    masm.push(cnt1);
    // cnt2 = min(cnt1, cnt2)
    masm.cmovl(ConditionFlag.LessEqual, cnt2, result);
    // Is the minimum length zero?
    masm.testl(cnt2, cnt2);
    masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
    // if (ae == StrIntrinsicNode::LL) {
    if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
        // Load first bytes
        // result = str1[0]
        masm.movzbl(result, new AMD64Address(str1, 0));
        // cnt1 = str2[0]
        masm.movzbl(cnt1, new AMD64Address(str2, 0));
    // } else if (ae == StrIntrinsicNode::UU) {
    } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Load first characters
        masm.movzwl(result, new AMD64Address(str1, 0));
        masm.movzwl(cnt1, new AMD64Address(str2, 0));
    } else {
        masm.movzbl(result, new AMD64Address(str1, 0));
        masm.movzwl(cnt1, new AMD64Address(str2, 0));
    }
    masm.subl(result, cnt1);
    masm.jcc(ConditionFlag.NotZero, POP_LABEL);
    // if (ae == StrIntrinsicNode::UU) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Divide length by 2 to get number of chars
        masm.shrl(cnt2, 1);
    }
    masm.cmpl(cnt2, 1);
    masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
    // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
    if (kind1 == kind2) {
        masm.cmpptr(str1, str2);
        masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            scale = AMD64Address.Scale.Times1;
            stride = 16;
        } else {
            scale = AMD64Address.Scale.Times2;
            stride = 8;
        }
    } else {
        scale1 = AMD64Address.Scale.Times1;
        scale2 = AMD64Address.Scale.Times2;
        // scale not used
        stride = 8;
    }
    // if (UseAVX >= 2 && UseSSE42Intrinsics) {
    if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
        Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
        // Checkstyle: stop
        Label COMPARE_WIDE_VECTORS = new Label();
        Label VECTOR_NOT_EQUAL = new Label();
        Label COMPARE_WIDE_TAIL = new Label();
        Label COMPARE_SMALL_STR = new Label();
        Label COMPARE_WIDE_VECTORS_LOOP = new Label();
        Label COMPARE_16_CHARS = new Label();
        Label COMPARE_INDEX_CHAR = new Label();
        Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
        Label COMPARE_TAIL_LONG = new Label();
        // used only _LP64 && AVX3
        Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label();
        // Checkstyle: resume
        int pcmpmask = 0x19;
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            pcmpmask &= ~0x01;
        }
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            stride2 = 32;
        } else {
            stride2 = 16;
        }
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            adr_stride = stride << scale.log2;
        } else {
            // stride << scale1;
            adr_stride1 = 8;
            // stride << scale2;
            adr_stride2 = 16;
        }
        assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
        // rax and rdx are used by pcmpestri as elements counters
        masm.movl(result, cnt2);
        // cnt2 holds the vector count
        masm.andl(cnt2, ~(stride2 - 1));
        masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
        // fast path : compare first 2 8-char vectors.
        masm.bind(COMPARE_16_CHARS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, 0));
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
        }
        masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
        masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
            masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
            masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
        }
        masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
        masm.addl(cnt1, stride);
        // Compare the characters at index in cnt1
        // cnt1 has the offset of the mismatching character
        masm.bind(COMPARE_INDEX_CHAR);
        loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
        masm.subl(result, cnt2);
        masm.jmp(POP_LABEL);
        // Setup the registers to start vector comparison loop
        masm.bind(COMPARE_WIDE_VECTORS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.subl(result, stride2);
        masm.subl(cnt2, stride2);
        masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
        masm.negq(result);
        // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
        masm.bind(COMPARE_WIDE_VECTORS_LOOP);
        // if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
        if (supportsAVX512VLBW(crb.target)) {
            masm.cmpl(cnt2, stride2x2);
            masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
            // cnt2 holds the vector count
            masm.testl(cnt2, stride2x2 - 1);
            // means we cannot subtract by 0x40
            masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
            // the hottest loop
            masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3);
            // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
            if (kind1 == kind2) {
                masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
                // k7 == 11..11, if operands equal, otherwise k7 has some 0
                masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
            } else {
                masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
                // k7 == 11..11, if operands equal, otherwise k7 has some 0
                masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
            }
            masm.kortestql(k7, k7);
            // miscompare
            masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED);
            // update since we already compared at this addr
            masm.addq(result, stride2x2);
            // and sub the size too
            masm.subl(cnt2, stride2x2);
            masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
            masm.vpxor(vec1, vec1, vec1);
            masm.jmpb(COMPARE_WIDE_TAIL);
        }
        masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
            masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
        } else {
            masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
            masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
        }
        masm.vptest(vec1, vec1);
        masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
        masm.addq(result, stride2);
        masm.subl(cnt2, stride2);
        masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
        // clean upper bits of YMM registers
        masm.vpxor(vec1, vec1, vec1);
        // compare wide vectors tail
        masm.bind(COMPARE_WIDE_TAIL);
        masm.testq(result, result);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        masm.movl(result, stride2);
        masm.movl(cnt2, result);
        masm.negq(result);
        masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
        // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
        masm.bind(VECTOR_NOT_EQUAL);
        // clean upper bits of YMM registers
        masm.vpxor(vec1, vec1, vec1);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.jmp(COMPARE_16_CHARS);
        // Compare tail chars, length between 1 to 15 chars
        masm.bind(COMPARE_TAIL_LONG);
        masm.movl(cnt2, result);
        masm.cmpl(cnt2, stride);
        masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, 0));
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
        }
        masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
        masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
        masm.subq(cnt2, stride);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.negq(cnt2);
        masm.jmpb(WHILE_HEAD_LABEL);
        masm.bind(COMPARE_SMALL_STR);
    } else if (supportsSSE42(crb.target)) {
        Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
        // Checkstyle: stop
        Label COMPARE_WIDE_VECTORS = new Label();
        Label VECTOR_NOT_EQUAL = new Label();
        Label COMPARE_TAIL = new Label();
        // Checkstyle: resume
        int pcmpmask = 0x19;
        // Setup to compare 8-char (16-byte) vectors,
        // start from first character again because it has aligned address.
        masm.movl(result, cnt2);
        // cnt2 holds the vector count
        masm.andl(cnt2, ~(stride - 1));
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            pcmpmask &= ~0x01;
        }
        masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.negq(result);
        // rcx - first mismatched element index
        assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
        masm.bind(COMPARE_WIDE_VECTORS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, result, scale));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
        }
        // After pcmpestri cnt1(rcx) contains mismatched element index
        // CF==1
        masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL);
        masm.addq(result, stride);
        masm.subq(cnt2, stride);
        masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
        // compare wide vectors tail
        masm.testq(result, result);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        masm.movl(cnt2, stride);
        masm.movl(result, stride);
        masm.negq(result);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, result, scale));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
        }
        masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
        // Mismatched characters in the vectors
        masm.bind(VECTOR_NOT_EQUAL);
        masm.addq(cnt1, result);
        loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
        masm.subl(result, cnt2);
        masm.jmpb(POP_LABEL);
        // limit is zero
        masm.bind(COMPARE_TAIL);
        masm.movl(cnt2, result);
    // Fallthru to tail compare
    }
    // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
    if (kind1 == kind2) {
        masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
        masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
    } else {
        masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
        masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
    }
    // first character was compared already
    masm.decrementl(cnt2);
    masm.negq(cnt2);
    // Compare the rest of the elements
    masm.bind(WHILE_HEAD_LABEL);
    loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
    masm.subl(result, cnt1);
    masm.jccb(ConditionFlag.NotZero, POP_LABEL);
    masm.incrementq(cnt2, 1);
    masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
    // Strings are equal up to min length. Return the length difference.
    masm.bind(LENGTH_DIFF_LABEL);
    masm.pop(result);
    // if (ae == StrIntrinsicNode::UU) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Divide diff by 2 to get number of chars
        masm.sarl(result, 1);
    }
    masm.jmpb(DONE_LABEL);
    // if (VM_Version::supports_avx512vlbw()) {
    if (supportsAVX512VLBW(crb.target)) {
        masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
        masm.kmovql(cnt1, k7);
        masm.notq(cnt1);
        masm.bsfq(cnt2, cnt1);
        // if (ae != StrIntrinsicNode::LL) {
        if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
            // Divide diff by 2 to get number of chars
            masm.sarl(cnt2, 1);
        }
        masm.addq(result, cnt2);
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
            masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
        } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
            masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
            masm.movzwl(result, new AMD64Address(str1, result, scale));
        } else {
            masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
            masm.movzbl(result, new AMD64Address(str1, result, scale1));
        }
        masm.subl(result, cnt1);
        masm.jmpb(POP_LABEL);
    }
    // Discard the stored length difference
    masm.bind(POP_LABEL);
    masm.pop(cnt1);
    // That's it
    masm.bind(DONE_LABEL);
    // if (ae == StrIntrinsicNode::UL) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
        masm.negl(result);
    }
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Scale(org.graalvm.compiler.asm.amd64.AMD64Address.Scale) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

Register (jdk.vm.ci.code.Register)1 TargetDescription (jdk.vm.ci.code.TargetDescription)1 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)1 Label (org.graalvm.compiler.asm.Label)1 AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)1 Scale (org.graalvm.compiler.asm.amd64.AMD64Address.Scale)1 OperandSize (org.graalvm.compiler.asm.amd64.AMD64Assembler.OperandSize)1 MatchRule (org.graalvm.compiler.core.match.MatchRule)1