Search in sources :

Example 1 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64HotSpotCounterOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb) {
    AMD64MacroAssembler masm = (AMD64MacroAssembler) crb.asm;
    TargetDescription target = crb.target;
    Register scratch;
    // want to spill it to the stack.
    if (!contains(increments, rax)) {
        scratch = rax;
    } else if (!contains(increments, rbx)) {
        scratch = rbx;
    } else {
        // emitIncrement().
        throw GraalError.unimplemented("RAX and RBX are increment registers at the same time, spilling over the scratch register is not supported right now");
    }
    // address for counters array
    AMD64Address countersArrayAddr = new AMD64Address(thread, config.jvmciCountersThreadOffset);
    Register countersArrayReg = scratch;
    // backup scratch register
    masm.movq((AMD64Address) crb.asAddress(backupSlot), scratch);
    // load counters array
    masm.movptr(countersArrayReg, countersArrayAddr);
    CounterProcedure emitProcedure = (counterIndex, increment, displacement) -> emitIncrement(masm, countersArrayReg, increment, displacement);
    forEachCounter(emitProcedure, target);
    // restore scratch register
    masm.movq(scratch, (AMD64Address) crb.asAddress(backupSlot));
}
Also used : HotSpotCounterOp(org.graalvm.compiler.hotspot.HotSpotCounterOp) AMD64.rax(jdk.vm.ci.amd64.AMD64.rax) AMD64.rbx(jdk.vm.ci.amd64.AMD64.rbx) LIRValueUtil.asJavaConstant(org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant) CompilationResultBuilder(org.graalvm.compiler.lir.asm.CompilationResultBuilder) Register(jdk.vm.ci.code.Register) LIRValueUtil.isJavaConstant(org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant) TargetDescription(jdk.vm.ci.code.TargetDescription) Value(jdk.vm.ci.meta.Value) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address) HotSpotRegistersProvider(org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider) ValueUtil.isRegister(jdk.vm.ci.code.ValueUtil.isRegister) Opcode(org.graalvm.compiler.lir.Opcode) GraalError(org.graalvm.compiler.debug.GraalError) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) GraalHotSpotVMConfig(org.graalvm.compiler.hotspot.GraalHotSpotVMConfig) AMD64MacroAssembler(org.graalvm.compiler.asm.amd64.AMD64MacroAssembler) LIRInstructionClass(org.graalvm.compiler.lir.LIRInstructionClass) AllocatableValue(jdk.vm.ci.meta.AllocatableValue) Register(jdk.vm.ci.code.Register) ValueUtil.isRegister(jdk.vm.ci.code.ValueUtil.isRegister) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) AMD64MacroAssembler(org.graalvm.compiler.asm.amd64.AMD64MacroAssembler) TargetDescription(jdk.vm.ci.code.TargetDescription) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 2 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64HotSpotPushInterpreterFrameOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    final Register frameSizeRegister = asRegister(frameSize);
    final Register framePcRegister = asRegister(framePc);
    final Register senderSpRegister = asRegister(senderSp);
    final Register initialInfoRegister = asRegister(initialInfo);
    final int wordSize = 8;
    // We'll push PC and BP by hand.
    masm.subq(frameSizeRegister, 2 * wordSize);
    // Push return address.
    masm.push(framePcRegister);
    // Prolog
    masm.push(initialInfoRegister);
    masm.movq(initialInfoRegister, rsp);
    masm.subq(rsp, frameSizeRegister);
    // This value is corrected by layout_activation_impl.
    masm.movptr(new AMD64Address(initialInfoRegister, config.frameInterpreterFrameLastSpOffset * wordSize), 0);
    // Make the frame walkable.
    masm.movq(new AMD64Address(initialInfoRegister, config.frameInterpreterFrameSenderSpOffset * wordSize), senderSpRegister);
}
Also used : ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Register(jdk.vm.ci.code.Register) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 3 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64ArrayCompareToOp method emitCode.

@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    Register result = asRegister(resultValue);
    Register str1 = asRegister(temp1);
    Register str2 = asRegister(temp2);
    // Load array base addresses.
    masm.leaq(str1, new AMD64Address(asRegister(array1Value), array1BaseOffset));
    masm.leaq(str2, new AMD64Address(asRegister(array2Value), array2BaseOffset));
    Register cnt1 = asRegister(length1Value);
    Register cnt2 = asRegister(length2Value);
    // Checkstyle: stop
    Label LENGTH_DIFF_LABEL = new Label();
    Label POP_LABEL = new Label();
    Label DONE_LABEL = new Label();
    Label WHILE_HEAD_LABEL = new Label();
    // used only _LP64 && AVX3
    Label COMPARE_WIDE_VECTORS_LOOP_FAILED = new Label();
    int stride, stride2;
    int adr_stride = -1;
    int adr_stride1 = -1;
    int adr_stride2 = -1;
    // Checkstyle: resume
    int stride2x2 = 0x40;
    AMD64Address.Scale scale = null;
    AMD64Address.Scale scale1 = null;
    AMD64Address.Scale scale2 = null;
    // if (ae != StrIntrinsicNode::LL) {
    if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
        stride2x2 = 0x20;
    }
    // if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
    if (kind1 != kind2) {
        masm.shrl(cnt2, 1);
    }
    // Compute the minimum of the string lengths and the
    // difference of the string lengths (stack).
    // Do the conditional move stuff
    masm.movl(result, cnt1);
    masm.subl(cnt1, cnt2);
    masm.push(cnt1);
    // cnt2 = min(cnt1, cnt2)
    masm.cmovl(ConditionFlag.LessEqual, cnt2, result);
    // Is the minimum length zero?
    masm.testl(cnt2, cnt2);
    masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
    // if (ae == StrIntrinsicNode::LL) {
    if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
        // Load first bytes
        // result = str1[0]
        masm.movzbl(result, new AMD64Address(str1, 0));
        // cnt1 = str2[0]
        masm.movzbl(cnt1, new AMD64Address(str2, 0));
    // } else if (ae == StrIntrinsicNode::UU) {
    } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Load first characters
        masm.movzwl(result, new AMD64Address(str1, 0));
        masm.movzwl(cnt1, new AMD64Address(str2, 0));
    } else {
        masm.movzbl(result, new AMD64Address(str1, 0));
        masm.movzwl(cnt1, new AMD64Address(str2, 0));
    }
    masm.subl(result, cnt1);
    masm.jcc(ConditionFlag.NotZero, POP_LABEL);
    // if (ae == StrIntrinsicNode::UU) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Divide length by 2 to get number of chars
        masm.shrl(cnt2, 1);
    }
    masm.cmpl(cnt2, 1);
    masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
    // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
    if (kind1 == kind2) {
        masm.cmpptr(str1, str2);
        masm.jcc(ConditionFlag.Equal, LENGTH_DIFF_LABEL);
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            scale = AMD64Address.Scale.Times1;
            stride = 16;
        } else {
            scale = AMD64Address.Scale.Times2;
            stride = 8;
        }
    } else {
        scale1 = AMD64Address.Scale.Times1;
        scale2 = AMD64Address.Scale.Times2;
        // scale not used
        stride = 8;
    }
    // if (UseAVX >= 2 && UseSSE42Intrinsics) {
    if (supportsAVX2(crb.target) && supportsSSE42(crb.target)) {
        Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
        // Checkstyle: stop
        Label COMPARE_WIDE_VECTORS = new Label();
        Label VECTOR_NOT_EQUAL = new Label();
        Label COMPARE_WIDE_TAIL = new Label();
        Label COMPARE_SMALL_STR = new Label();
        Label COMPARE_WIDE_VECTORS_LOOP = new Label();
        Label COMPARE_16_CHARS = new Label();
        Label COMPARE_INDEX_CHAR = new Label();
        Label COMPARE_WIDE_VECTORS_LOOP_AVX2 = new Label();
        Label COMPARE_TAIL_LONG = new Label();
        // used only _LP64 && AVX3
        Label COMPARE_WIDE_VECTORS_LOOP_AVX3 = new Label();
        // Checkstyle: resume
        int pcmpmask = 0x19;
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            pcmpmask &= ~0x01;
        }
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            stride2 = 32;
        } else {
            stride2 = 16;
        }
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            adr_stride = stride << scale.log2;
        } else {
            // stride << scale1;
            adr_stride1 = 8;
            // stride << scale2;
            adr_stride2 = 16;
        }
        assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
        // rax and rdx are used by pcmpestri as elements counters
        masm.movl(result, cnt2);
        // cnt2 holds the vector count
        masm.andl(cnt2, ~(stride2 - 1));
        masm.jcc(ConditionFlag.Zero, COMPARE_TAIL_LONG);
        // fast path : compare first 2 8-char vectors.
        masm.bind(COMPARE_16_CHARS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, 0));
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
        }
        masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
        masm.jccb(ConditionFlag.Below, COMPARE_INDEX_CHAR);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, adr_stride));
            masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, adr_stride1));
            masm.pcmpestri(vec1, new AMD64Address(str2, adr_stride2), pcmpmask);
        }
        masm.jccb(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS);
        masm.addl(cnt1, stride);
        // Compare the characters at index in cnt1
        // cnt1 has the offset of the mismatching character
        masm.bind(COMPARE_INDEX_CHAR);
        loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
        masm.subl(result, cnt2);
        masm.jmp(POP_LABEL);
        // Setup the registers to start vector comparison loop
        masm.bind(COMPARE_WIDE_VECTORS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.subl(result, stride2);
        masm.subl(cnt2, stride2);
        masm.jcc(ConditionFlag.Zero, COMPARE_WIDE_TAIL);
        masm.negq(result);
        // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
        masm.bind(COMPARE_WIDE_VECTORS_LOOP);
        // if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
        if (supportsAVX512VLBW(crb.target)) {
            masm.cmpl(cnt2, stride2x2);
            masm.jccb(ConditionFlag.Below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
            // cnt2 holds the vector count
            masm.testl(cnt2, stride2x2 - 1);
            // means we cannot subtract by 0x40
            masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX2);
            // the hottest loop
            masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX3);
            // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
            if (kind1 == kind2) {
                masm.evmovdquq(vec1, new AMD64Address(str1, result, scale), AvxVectorLen.AVX_512bit);
                // k7 == 11..11, if operands equal, otherwise k7 has some 0
                masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale), AvxVectorLen.AVX_512bit);
            } else {
                masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_512bit);
                // k7 == 11..11, if operands equal, otherwise k7 has some 0
                masm.evpcmpeqb(k7, vec1, new AMD64Address(str2, result, scale2), AvxVectorLen.AVX_512bit);
            }
            masm.kortestql(k7, k7);
            // miscompare
            masm.jcc(ConditionFlag.AboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED);
            // update since we already compared at this addr
            masm.addq(result, stride2x2);
            // and sub the size too
            masm.subl(cnt2, stride2x2);
            masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
            masm.vpxor(vec1, vec1, vec1);
            masm.jmpb(COMPARE_WIDE_TAIL);
        }
        masm.bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.vmovdqu(vec1, new AMD64Address(str1, result, scale));
            masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale));
        } else {
            masm.vpmovzxbw(vec1, new AMD64Address(str1, result, scale1), AvxVectorLen.AVX_256bit);
            masm.vpxor(vec1, vec1, new AMD64Address(str2, result, scale2));
        }
        masm.vptest(vec1, vec1);
        masm.jcc(ConditionFlag.NotZero, VECTOR_NOT_EQUAL);
        masm.addq(result, stride2);
        masm.subl(cnt2, stride2);
        masm.jcc(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS_LOOP);
        // clean upper bits of YMM registers
        masm.vpxor(vec1, vec1, vec1);
        // compare wide vectors tail
        masm.bind(COMPARE_WIDE_TAIL);
        masm.testq(result, result);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        masm.movl(result, stride2);
        masm.movl(cnt2, result);
        masm.negq(result);
        masm.jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
        // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
        masm.bind(VECTOR_NOT_EQUAL);
        // clean upper bits of YMM registers
        masm.vpxor(vec1, vec1, vec1);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.jmp(COMPARE_16_CHARS);
        // Compare tail chars, length between 1 to 15 chars
        masm.bind(COMPARE_TAIL_LONG);
        masm.movl(cnt2, result);
        masm.cmpl(cnt2, stride);
        masm.jcc(ConditionFlag.Less, COMPARE_SMALL_STR);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, 0));
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, 0));
        }
        masm.pcmpestri(vec1, new AMD64Address(str2, 0), pcmpmask);
        masm.jcc(ConditionFlag.Below, COMPARE_INDEX_CHAR);
        masm.subq(cnt2, stride);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.negq(cnt2);
        masm.jmpb(WHILE_HEAD_LABEL);
        masm.bind(COMPARE_SMALL_STR);
    } else if (supportsSSE42(crb.target)) {
        Register vec1 = asRegister(vectorTemp1, AMD64Kind.DOUBLE);
        // Checkstyle: stop
        Label COMPARE_WIDE_VECTORS = new Label();
        Label VECTOR_NOT_EQUAL = new Label();
        Label COMPARE_TAIL = new Label();
        // Checkstyle: resume
        int pcmpmask = 0x19;
        // Setup to compare 8-char (16-byte) vectors,
        // start from first character again because it has aligned address.
        masm.movl(result, cnt2);
        // cnt2 holds the vector count
        masm.andl(cnt2, ~(stride - 1));
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            pcmpmask &= ~0x01;
        }
        masm.jcc(ConditionFlag.Zero, COMPARE_TAIL);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.leaq(str1, new AMD64Address(str1, result, scale));
            masm.leaq(str2, new AMD64Address(str2, result, scale));
        } else {
            masm.leaq(str1, new AMD64Address(str1, result, scale1));
            masm.leaq(str2, new AMD64Address(str2, result, scale2));
        }
        masm.negq(result);
        // rcx - first mismatched element index
        assert result.equals(rax) && cnt2.equals(rdx) && cnt1.equals(rcx) : "pcmpestri";
        masm.bind(COMPARE_WIDE_VECTORS);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, result, scale));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
        }
        // After pcmpestri cnt1(rcx) contains mismatched element index
        // CF==1
        masm.jccb(ConditionFlag.Below, VECTOR_NOT_EQUAL);
        masm.addq(result, stride);
        masm.subq(cnt2, stride);
        masm.jccb(ConditionFlag.NotZero, COMPARE_WIDE_VECTORS);
        // compare wide vectors tail
        masm.testq(result, result);
        masm.jcc(ConditionFlag.Zero, LENGTH_DIFF_LABEL);
        masm.movl(cnt2, stride);
        masm.movl(result, stride);
        masm.negq(result);
        // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
        if (kind1 == kind2) {
            masm.movdqu(vec1, new AMD64Address(str1, result, scale));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale), pcmpmask);
        } else {
            masm.pmovzxbw(vec1, new AMD64Address(str1, result, scale1));
            masm.pcmpestri(vec1, new AMD64Address(str2, result, scale2), pcmpmask);
        }
        masm.jccb(ConditionFlag.AboveEqual, LENGTH_DIFF_LABEL);
        // Mismatched characters in the vectors
        masm.bind(VECTOR_NOT_EQUAL);
        masm.addq(cnt1, result);
        loadNextElements(masm, result, cnt2, str1, str2, scale, scale1, scale2, cnt1);
        masm.subl(result, cnt2);
        masm.jmpb(POP_LABEL);
        // limit is zero
        masm.bind(COMPARE_TAIL);
        masm.movl(cnt2, result);
    // Fallthru to tail compare
    }
    // if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
    if (kind1 == kind2) {
        masm.leaq(str1, new AMD64Address(str1, cnt2, scale));
        masm.leaq(str2, new AMD64Address(str2, cnt2, scale));
    } else {
        masm.leaq(str1, new AMD64Address(str1, cnt2, scale1));
        masm.leaq(str2, new AMD64Address(str2, cnt2, scale2));
    }
    // first character was compared already
    masm.decrementl(cnt2);
    masm.negq(cnt2);
    // Compare the rest of the elements
    masm.bind(WHILE_HEAD_LABEL);
    loadNextElements(masm, result, cnt1, str1, str2, scale, scale1, scale2, cnt2);
    masm.subl(result, cnt1);
    masm.jccb(ConditionFlag.NotZero, POP_LABEL);
    masm.incrementq(cnt2, 1);
    masm.jccb(ConditionFlag.NotZero, WHILE_HEAD_LABEL);
    // Strings are equal up to min length. Return the length difference.
    masm.bind(LENGTH_DIFF_LABEL);
    masm.pop(result);
    // if (ae == StrIntrinsicNode::UU) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
        // Divide diff by 2 to get number of chars
        masm.sarl(result, 1);
    }
    masm.jmpb(DONE_LABEL);
    // if (VM_Version::supports_avx512vlbw()) {
    if (supportsAVX512VLBW(crb.target)) {
        masm.bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
        masm.kmovql(cnt1, k7);
        masm.notq(cnt1);
        masm.bsfq(cnt2, cnt1);
        // if (ae != StrIntrinsicNode::LL) {
        if (kind1 != JavaKind.Byte && kind2 != JavaKind.Byte) {
            // Divide diff by 2 to get number of chars
            masm.sarl(cnt2, 1);
        }
        masm.addq(result, cnt2);
        // if (ae == StrIntrinsicNode::LL) {
        if (kind1 == JavaKind.Byte && kind2 == JavaKind.Byte) {
            masm.movzbl(cnt1, new AMD64Address(str2, result, Scale.Times1));
            masm.movzbl(result, new AMD64Address(str1, result, Scale.Times1));
        } else if (kind1 == JavaKind.Char && kind2 == JavaKind.Char) {
            masm.movzwl(cnt1, new AMD64Address(str2, result, scale));
            masm.movzwl(result, new AMD64Address(str1, result, scale));
        } else {
            masm.movzwl(cnt1, new AMD64Address(str2, result, scale2));
            masm.movzbl(result, new AMD64Address(str1, result, scale1));
        }
        masm.subl(result, cnt1);
        masm.jmpb(POP_LABEL);
    }
    // Discard the stored length difference
    masm.bind(POP_LABEL);
    masm.pop(cnt1);
    // That's it
    masm.bind(DONE_LABEL);
    // if (ae == StrIntrinsicNode::UL) {
    if (kind1 == JavaKind.Char && kind2 == JavaKind.Byte) {
        masm.negl(result);
    }
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Scale(org.graalvm.compiler.asm.amd64.AMD64Address.Scale) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 4 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64ArrayEqualsOp method emitTailCompares.

/**
 * Emits code to compare the remaining 1 to 4 bytes.
 */
private void emitTailCompares(AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
    Label compare2Bytes = new Label();
    Label compare1Byte = new Label();
    Register temp = asRegister(temp4);
    if (kind.getByteCount() <= 4) {
        // Compare trailing 4 bytes, if any.
        masm.testl(result, 4);
        masm.jccb(ConditionFlag.Zero, compare2Bytes);
        masm.movl(temp, new AMD64Address(array1, 0));
        masm.cmpl(temp, new AMD64Address(array2, 0));
        if (kind == JavaKind.Float) {
            masm.jccb(ConditionFlag.Equal, trueLabel);
            emitFloatCompare(masm, array1, array2, Register.None, 0, falseLabel, true);
            masm.jmpb(trueLabel);
        } else {
            masm.jccb(ConditionFlag.NotEqual, falseLabel);
        }
        if (kind.getByteCount() <= 2) {
            // Move array pointers forward.
            masm.leaq(array1, new AMD64Address(array1, 4));
            masm.leaq(array2, new AMD64Address(array2, 4));
            // Compare trailing 2 bytes, if any.
            masm.bind(compare2Bytes);
            masm.testl(result, 2);
            masm.jccb(ConditionFlag.Zero, compare1Byte);
            masm.movzwl(temp, new AMD64Address(array1, 0));
            masm.movzwl(length, new AMD64Address(array2, 0));
            masm.cmpl(temp, length);
            masm.jccb(ConditionFlag.NotEqual, falseLabel);
            // The one-byte tail compare is only required for boolean and byte arrays.
            if (kind.getByteCount() <= 1) {
                // Move array pointers forward before we compare the last trailing byte.
                masm.leaq(array1, new AMD64Address(array1, 2));
                masm.leaq(array2, new AMD64Address(array2, 2));
                // Compare trailing byte, if any.
                masm.bind(compare1Byte);
                masm.testl(result, 1);
                masm.jccb(ConditionFlag.Zero, trueLabel);
                masm.movzbl(temp, new AMD64Address(array1, 0));
                masm.movzbl(length, new AMD64Address(array2, 0));
                masm.cmpl(temp, length);
                masm.jccb(ConditionFlag.NotEqual, falseLabel);
            } else {
                masm.bind(compare1Byte);
            }
        } else {
            masm.bind(compare2Bytes);
        }
    }
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 5 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method logIntrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG() ---------------------
     *
     * x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpps instruction (B0) B = int((B0*2^7+0.5))/2^7
     *
     * Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
     *
     * Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and p(r) is a degree 7
     * polynomial -log(B) read from data table (high, low parts) Result is formed from high and low
     * parts.
     *
     * Special cases: log(NaN) = quiet NaN, and raise invalid exception log(+INF) = that INF log(0)
     * = -INF with divide-by-zero exception raised log(1) = +0 log(x) = NaN with invalid exception
     * raised if x < -0, including -INF
     *
     */
public void logIntrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant logTwoTablePtr = new ArrayDataPointerConstant(logTwoTable, 16);
    ArrayDataPointerConstant logTwoDataPtr = new ArrayDataPointerConstant(logTwoData, 16);
    ArrayDataPointerConstant coeffLogTwoDataPtr = new ArrayDataPointerConstant(coeffLogTwoData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    masm.movq(gpr1, 0x3ff0000000000000L);
    masm.movdq(temp2, gpr1);
    masm.movq(gpr3, 0x77f0000000000000L);
    masm.movdq(temp3, gpr3);
    masm.movl(gpr2, 32768);
    masm.movdl(temp4, gpr2);
    masm.movq(gpr2, 0xffffe00000000000L);
    masm.movdq(temp5, gpr2);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    masm.leaq(gpr4, externalAddress(logTwoTablePtr));
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0xE4);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.paddd(dest, temp4);
    masm.por(temp1, temp3);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.pand(temp5, temp1);
    masm.pand(dest, temp6);
    masm.subsd(temp1, temp5);
    masm.mulpd(temp5, dest);
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulsd(temp1, dest);
    // 0xfefa3800,
    masm.movdq(temp6, externalAddress(logTwoDataPtr));
    // 0x3fa62e42
    // 0x92492492,
    masm.movdqu(temp3, externalAddress(coeffLogTwoDataPtr));
    // 0x3fc24924,
    // 0x00000000,
    // 0xbfd00000
    masm.subsd(temp5, temp2);
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, 0));
    masm.leaq(gpr4, externalAddress(coeffLogTwoDataPtr));
    // 0x3d6fb175,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xbfc5555e,
    // 0x55555555,
    // 0x3fd55555
    masm.addsd(temp1, temp5);
    // 0x9999999a,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x3fc99999,
    // 0x00000000,
    // 0xbfe00000
    masm.mulsd(temp6, temp7);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp5, temp1);
    } else {
        masm.movdqu(temp5, temp1);
        masm.movlhps(temp5, temp5);
    }
    masm.leaq(gpr4, externalAddress(logTwoDataPtr));
    // 0x93c76730,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3ceef357
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.mulpd(temp5, temp5);
    if (masm.supports(CPUFeature.SSE3)) {
        masm.movddup(temp6, dest);
    } else {
        masm.movdqu(temp6, dest);
        masm.movlhps(temp6, temp6);
    }
    masm.addsd(dest, temp1);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.subsd(temp6, dest);
    masm.mulsd(temp4, temp1);
    masm.pshufd(temp2, dest, 0xEE);
    masm.addsd(temp1, temp6);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.psrlq(dest, 27);
    masm.movl(gpr2, 18416);
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0xE4);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

Register (jdk.vm.ci.code.Register)8 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)7 AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)7 Label (org.graalvm.compiler.asm.Label)5 Scale (org.graalvm.compiler.asm.amd64.AMD64Address.Scale)2 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)2 AMD64.rax (jdk.vm.ci.amd64.AMD64.rax)1 AMD64.rbx (jdk.vm.ci.amd64.AMD64.rbx)1 TargetDescription (jdk.vm.ci.code.TargetDescription)1 ValueUtil.isRegister (jdk.vm.ci.code.ValueUtil.isRegister)1 AllocatableValue (jdk.vm.ci.meta.AllocatableValue)1 Value (jdk.vm.ci.meta.Value)1 AMD64MacroAssembler (org.graalvm.compiler.asm.amd64.AMD64MacroAssembler)1 GraalError (org.graalvm.compiler.debug.GraalError)1 GraalHotSpotVMConfig (org.graalvm.compiler.hotspot.GraalHotSpotVMConfig)1 HotSpotCounterOp (org.graalvm.compiler.hotspot.HotSpotCounterOp)1 HotSpotRegistersProvider (org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider)1 LIRInstructionClass (org.graalvm.compiler.lir.LIRInstructionClass)1 LIRValueUtil.asJavaConstant (org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant)1 LIRValueUtil.isJavaConstant (org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant)1