Search in sources :

Example 6 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64Assembler method emitOperandHelper.

/**
 * Emits the ModR/M byte and optionally the SIB byte for one memory operand and an opcode
 * extension in the R field.
 *
 * @param force4Byte use 4 byte encoding for displacements that would normally fit in a byte
 * @param additionalInstructionSize the number of bytes that will be emitted after the operand,
 *            so that the start position of the next instruction can be computed even though
 *            this instruction has not been completely emitted yet.
 */
protected void emitOperandHelper(int reg, AMD64Address addr, boolean force4Byte, int additionalInstructionSize) {
    assert (reg & 0x07) == reg;
    int regenc = reg << 3;
    Register base = addr.getBase();
    Register index = addr.getIndex();
    AMD64Address.Scale scale = addr.getScale();
    int disp = addr.getDisplacement();
    if (base.equals(AMD64.rip)) {
        // [00 000 101] disp32
        assert index.equals(Register.None) : "cannot use RIP relative addressing with index register";
        emitByte(0x05 | regenc);
        if (codePatchingAnnotationConsumer != null && addr.instructionStartPosition >= 0) {
            codePatchingAnnotationConsumer.accept(new AddressDisplacementAnnotation(addr.instructionStartPosition, position(), 4, position() + 4 + additionalInstructionSize));
        }
        emitInt(disp);
    } else if (base.isValid()) {
        int baseenc = base.isValid() ? encode(base) : 0;
        if (index.isValid()) {
            int indexenc = encode(index) << 3;
            // [base + indexscale + disp]
            if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
                // [00 reg 100][ss index base]
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x04 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
            } else if (isByte(disp) && !force4Byte) {
                // [01 reg 100][ss index base] imm8
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x44 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
                emitByte(disp & 0xFF);
            } else {
                // [10 reg 100][ss index base] disp32
                assert !index.equals(rsp) : "illegal addressing mode";
                emitByte(0x84 | regenc);
                emitByte(scale.log2 << 6 | indexenc | baseenc);
                emitInt(disp);
            }
        } else if (base.equals(rsp) || base.equals(r12)) {
            // [rsp + disp]
            if (disp == 0) {
                // [rsp]
                // [00 reg 100][00 100 100]
                emitByte(0x04 | regenc);
                emitByte(0x24);
            } else if (isByte(disp) && !force4Byte) {
                // [rsp + imm8]
                // [01 reg 100][00 100 100] disp8
                emitByte(0x44 | regenc);
                emitByte(0x24);
                emitByte(disp & 0xFF);
            } else {
                // [rsp + imm32]
                // [10 reg 100][00 100 100] disp32
                emitByte(0x84 | regenc);
                emitByte(0x24);
                emitInt(disp);
            }
        } else {
            // [base + disp]
            assert !base.equals(rsp) && !base.equals(r12) : "illegal addressing mode";
            if (disp == 0 && !base.equals(rbp) && !base.equals(r13)) {
                // [base]
                // [00 reg base]
                emitByte(0x00 | regenc | baseenc);
            } else if (isByte(disp) && !force4Byte) {
                // [base + disp8]
                // [01 reg base] disp8
                emitByte(0x40 | regenc | baseenc);
                emitByte(disp & 0xFF);
            } else {
                // [base + disp32]
                // [10 reg base] disp32
                emitByte(0x80 | regenc | baseenc);
                emitInt(disp);
            }
        }
    } else {
        if (index.isValid()) {
            int indexenc = encode(index) << 3;
            // [00 reg 100][ss index 101] disp32
            assert !index.equals(rsp) : "illegal addressing mode";
            emitByte(0x04 | regenc);
            emitByte(scale.log2 << 6 | indexenc | 0x05);
            emitInt(disp);
        } else {
            // [disp] ABSOLUTE
            // [00 reg 100][00 100 101] disp32
            emitByte(0x04 | regenc);
            emitByte(0x25);
            emitInt(disp);
        }
    }
    setCurAttributes(null);
}
Also used : Register(jdk.vm.ci.code.Register) Scale(org.graalvm.compiler.asm.amd64.AMD64Address.Scale)

Example 7 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64MathIntrinsicUnaryOp method log10Intrinsic.

/*
     * Copyright (c) 2014, 2016, Intel Corporation. All rights reserved. Intel Math Library (LIBM)
     * Source Code
     *
     * ALGORITHM DESCRIPTION - LOG10() ---------------------
     *
     * Let x=2^k * mx, mx in [1,2)
     *
     * Get B~1/mx based on the output of rcpss instruction (B0) B = int((B0*LH*2^7+0.5))/2^7 LH is a
     * short approximation for log10(e)
     *
     * Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
     *
     * Result: k*log10(2) - log(B) + p(r) p(r) is a degree 7 polynomial -log(B) read from data table
     * (high, low parts) Result is formed from high and low parts
     *
     * Special cases: log10(0) = -INF with divide-by-zero exception raised log10(1) = +0 log10(x) =
     * NaN with invalid exception raised if x < -0, including -INF log10(+INF) = +INF
     *
     */
public void log10Intrinsic(Register dest, Register value, CompilationResultBuilder crb, AMD64MacroAssembler masm) {
    ArrayDataPointerConstant highmaskLogTenPtr = new ArrayDataPointerConstant(highmaskLogTen, 16);
    ArrayDataPointerConstant logTenEPtr = new ArrayDataPointerConstant(logTenE, 16);
    ArrayDataPointerConstant logTenTablePtr = new ArrayDataPointerConstant(logTenTable, 16);
    ArrayDataPointerConstant logTwoLogTenDataPtr = new ArrayDataPointerConstant(logTwoLogTenData, 16);
    ArrayDataPointerConstant coeffLogTenDataPtr = new ArrayDataPointerConstant(coeffLogTenData, 16);
    Label bb0 = new Label();
    Label bb1 = new Label();
    Label bb2 = new Label();
    Label bb3 = new Label();
    Label bb4 = new Label();
    Label bb5 = new Label();
    Label bb6 = new Label();
    Label bb7 = new Label();
    Label bb8 = new Label();
    Register gpr1 = asRegister(gpr1Temp, AMD64Kind.QWORD);
    Register gpr2 = asRegister(gpr2Temp, AMD64Kind.QWORD);
    Register gpr3 = asRegister(rcxTemp, AMD64Kind.QWORD);
    Register gpr4 = asRegister(gpr4Temp, AMD64Kind.QWORD);
    Register temp1 = asRegister(xmm1Temp, AMD64Kind.DOUBLE);
    Register temp2 = asRegister(xmm2Temp, AMD64Kind.DOUBLE);
    Register temp3 = asRegister(xmm3Temp, AMD64Kind.DOUBLE);
    Register temp4 = asRegister(xmm4Temp, AMD64Kind.DOUBLE);
    Register temp5 = asRegister(xmm5Temp, AMD64Kind.DOUBLE);
    Register temp6 = asRegister(xmm6Temp, AMD64Kind.DOUBLE);
    Register temp7 = asRegister(xmm7Temp, AMD64Kind.DOUBLE);
    AMD64Address stackSlot = (AMD64Address) crb.asAddress(stackTemp);
    setCrb(crb);
    masm.movdq(stackSlot, value);
    if (dest.encoding != value.encoding) {
        masm.movdqu(dest, value);
    }
    // 0xf8000000,
    masm.movdqu(temp5, externalAddress(highmaskLogTenPtr));
    // 0xffffffff,
    // 0x00000000,
    // 0xffffe000
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movl(gpr2, 1054736384);
    masm.movdl(temp7, gpr2);
    masm.xorpd(temp3, temp3);
    masm.movl(gpr3, 30704);
    masm.pinsrw(temp3, gpr3, 3);
    masm.movl(gpr3, 32768);
    masm.movdl(temp4, gpr3);
    masm.movdqu(temp1, value);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 16352);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.subl(gpr1, 16);
    masm.cmpl(gpr1, 32736);
    masm.jcc(ConditionFlag.AboveEqual, bb0);
    masm.bind(bb1);
    masm.mulss(dest, temp7);
    masm.por(temp1, temp3);
    masm.andpd(temp5, temp1);
    masm.paddd(dest, temp4);
    // 0xc1a5f12e,
    masm.movdqu(temp3, externalAddress(coeffLogTenDataPtr));
    // 0x40358874,
    // 0x64d4ef0d,
    // 0xc0089309
    masm.leaq(gpr4, externalAddress(coeffLogTenDataPtr));
    // 0x385593b1,
    masm.movdqu(temp4, new AMD64Address(gpr4, 16));
    // 0xc025c917,
    // 0xdc963467,
    // 0x3ffc6a02
    masm.subsd(temp1, temp5);
    masm.movdl(gpr3, dest);
    masm.psllq(dest, 29);
    masm.andpd(dest, temp6);
    // 0x509f7800,
    masm.movdq(temp6, externalAddress(logTwoLogTenDataPtr));
    // 0x3f934413
    masm.andl(gpr1, 32752);
    masm.subl(gpr1, gpr2);
    masm.cvtsi2sdl(temp7, gpr1);
    masm.mulpd(temp5, dest);
    masm.mulsd(temp1, dest);
    masm.subsd(temp5, temp2);
    // 0x7f9d3aa1,
    masm.movdqu(temp2, new AMD64Address(gpr4, 32));
    // 0x4016ab9f,
    // 0xdc77b115,
    // 0xbff27af2
    masm.leaq(gpr4, externalAddress(logTenTablePtr));
    masm.andl(gpr3, 16711680);
    masm.shrl(gpr3, 12);
    masm.movdqu(dest, new AMD64Address(gpr4, gpr3, Scale.Times1, -1504));
    masm.addsd(temp1, temp5);
    masm.mulsd(temp6, temp7);
    masm.pshufd(temp5, temp1, 0x44);
    masm.leaq(gpr4, externalAddress(logTwoLogTenDataPtr));
    // 0x1f12b358,
    masm.mulsd(temp7, new AMD64Address(gpr4, 8));
    // 0x3cdfef31
    masm.mulsd(temp3, temp1);
    masm.addsd(dest, temp6);
    masm.mulpd(temp4, temp5);
    masm.leaq(gpr4, externalAddress(logTenEPtr));
    // 0xbf2e4108,
    masm.movdq(temp6, new AMD64Address(gpr4, 8));
    // 0x3f5a7a6c
    masm.mulpd(temp5, temp5);
    masm.addpd(temp4, temp2);
    masm.mulpd(temp3, temp5);
    masm.pshufd(temp2, dest, 0xE4);
    masm.addsd(dest, temp1);
    masm.mulsd(temp4, temp1);
    masm.subsd(temp2, dest);
    masm.mulsd(temp6, temp1);
    masm.addsd(temp1, temp2);
    masm.pshufd(temp2, dest, 0xEE);
    masm.mulsd(temp5, temp5);
    masm.addsd(temp7, temp2);
    masm.addsd(temp1, temp6);
    masm.addpd(temp4, temp3);
    masm.addsd(temp1, temp7);
    masm.mulpd(temp4, temp5);
    masm.addsd(temp1, temp4);
    masm.pshufd(temp5, temp4, 0xEE);
    masm.addsd(temp1, temp5);
    masm.addsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb0);
    masm.movdq(dest, stackSlot);
    masm.movdq(temp1, stackSlot);
    masm.addl(gpr1, 16);
    masm.cmpl(gpr1, 32768);
    masm.jcc(ConditionFlag.AboveEqual, bb2);
    masm.cmpl(gpr1, 16);
    masm.jcc(ConditionFlag.Below, bb3);
    masm.bind(bb4);
    masm.addsd(dest, dest);
    masm.jmp(bb8);
    masm.bind(bb5);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Above, bb4);
    masm.jmp(bb6);
    masm.bind(bb3);
    masm.xorpd(temp1, temp1);
    masm.addsd(temp1, dest);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(temp2, temp2);
    masm.movl(gpr1, 18416);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.movl(gpr1, 16368);
    masm.pinsrw(temp2, gpr1, 3);
    masm.movdqu(temp1, dest);
    masm.pextrw(gpr1, dest, 3);
    masm.por(dest, temp2);
    masm.movl(gpr2, 18416);
    masm.psrlq(dest, 27);
    // 0x00000000,
    masm.movdqu(temp2, externalAddress(logTenEPtr));
    // 0x3fdbc000,
    // 0xbf2e4108,
    // 0x3f5a7a6c
    masm.psrld(dest, 2);
    masm.rcpps(dest, dest);
    masm.psllq(temp1, 12);
    masm.pshufd(temp6, temp5, 0x4E);
    masm.psrlq(temp1, 12);
    masm.jmp(bb1);
    masm.bind(bb2);
    masm.movdl(gpr3, temp1);
    masm.psrlq(temp1, 32);
    masm.movdl(gpr2, temp1);
    masm.addl(gpr2, gpr2);
    masm.cmpl(gpr2, -2097152);
    masm.jcc(ConditionFlag.AboveEqual, bb5);
    masm.orl(gpr3, gpr2);
    masm.cmpl(gpr3, 0);
    masm.jcc(ConditionFlag.Equal, bb7);
    masm.bind(bb6);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 32752);
    masm.pinsrw(temp1, gpr1, 3);
    masm.mulsd(dest, temp1);
    masm.jmp(bb8);
    masm.bind(bb7);
    masm.xorpd(temp1, temp1);
    masm.xorpd(dest, dest);
    masm.movl(gpr1, 49136);
    masm.pinsrw(dest, gpr1, 3);
    masm.divsd(dest, temp1);
    masm.bind(bb8);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) ArrayDataPointerConstant(org.graalvm.compiler.lir.asm.ArrayDataPointerConstant) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Example 8 with AND

use of org.graalvm.compiler.asm.amd64.AMD64Assembler.AMD64BinaryArithmetic.AND in project graal by oracle.

the class AMD64ArrayEqualsOp method emit8ByteCompare.

/**
 * Emits code that uses 8-byte vector compares.
 */
private void emit8ByteCompare(CompilationResultBuilder crb, AMD64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label trueLabel, Label falseLabel) {
    Label loop = new Label();
    Label compareTail = new Label();
    boolean requiresNaNCheck = kind.isNumericFloat();
    Label loopCheck = new Label();
    Label nanCheck = new Label();
    Register temp = asRegister(temp4);
    // tail count (in bytes)
    masm.andl(result, VECTOR_SIZE - 1);
    // vector count (in bytes)
    masm.andl(length, ~(VECTOR_SIZE - 1));
    masm.jcc(ConditionFlag.Zero, compareTail);
    masm.leaq(array1, new AMD64Address(array1, length, Scale.Times1, 0));
    masm.leaq(array2, new AMD64Address(array2, length, Scale.Times1, 0));
    masm.negq(length);
    // Align the main loop
    masm.align(crb.target.wordSize * 2);
    masm.bind(loop);
    masm.movq(temp, new AMD64Address(array1, length, Scale.Times1, 0));
    masm.cmpq(temp, new AMD64Address(array2, length, Scale.Times1, 0));
    masm.jcc(ConditionFlag.NotEqual, requiresNaNCheck ? nanCheck : falseLabel);
    masm.bind(loopCheck);
    masm.addq(length, VECTOR_SIZE);
    masm.jccb(ConditionFlag.NotZero, loop);
    masm.testl(result, result);
    masm.jcc(ConditionFlag.Zero, trueLabel);
    if (requiresNaNCheck) {
        // NaN check is slow path and hence placed outside of the main loop.
        Label unalignedCheck = new Label();
        masm.jmpb(unalignedCheck);
        masm.bind(nanCheck);
        // At most two iterations, unroll in the emitted code.
        for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
            emitFloatCompare(masm, array1, array2, length, offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
        }
        masm.jmpb(loopCheck);
        masm.bind(unalignedCheck);
    }
    /*
         * Compare the remaining bytes with an unaligned memory load aligned to the end of the
         * array.
         */
    masm.movq(temp, new AMD64Address(array1, result, Scale.Times1, -VECTOR_SIZE));
    masm.cmpq(temp, new AMD64Address(array2, result, Scale.Times1, -VECTOR_SIZE));
    if (requiresNaNCheck) {
        masm.jcc(ConditionFlag.Equal, trueLabel);
        // At most two iterations, unroll in the emitted code.
        for (int offset = 0; offset < VECTOR_SIZE; offset += kind.getByteCount()) {
            emitFloatCompare(masm, array1, array2, result, -VECTOR_SIZE + offset, falseLabel, kind.getByteCount() == VECTOR_SIZE);
        }
    } else {
        masm.jccb(ConditionFlag.NotEqual, falseLabel);
    }
    masm.jmpb(trueLabel);
    masm.bind(compareTail);
    masm.movl(length, result);
}
Also used : Register(jdk.vm.ci.code.Register) ValueUtil.asRegister(jdk.vm.ci.code.ValueUtil.asRegister) Label(org.graalvm.compiler.asm.Label) AMD64Address(org.graalvm.compiler.asm.amd64.AMD64Address)

Aggregations

Register (jdk.vm.ci.code.Register)8 ValueUtil.asRegister (jdk.vm.ci.code.ValueUtil.asRegister)7 AMD64Address (org.graalvm.compiler.asm.amd64.AMD64Address)7 Label (org.graalvm.compiler.asm.Label)5 Scale (org.graalvm.compiler.asm.amd64.AMD64Address.Scale)2 ArrayDataPointerConstant (org.graalvm.compiler.lir.asm.ArrayDataPointerConstant)2 AMD64.rax (jdk.vm.ci.amd64.AMD64.rax)1 AMD64.rbx (jdk.vm.ci.amd64.AMD64.rbx)1 TargetDescription (jdk.vm.ci.code.TargetDescription)1 ValueUtil.isRegister (jdk.vm.ci.code.ValueUtil.isRegister)1 AllocatableValue (jdk.vm.ci.meta.AllocatableValue)1 Value (jdk.vm.ci.meta.Value)1 AMD64MacroAssembler (org.graalvm.compiler.asm.amd64.AMD64MacroAssembler)1 GraalError (org.graalvm.compiler.debug.GraalError)1 GraalHotSpotVMConfig (org.graalvm.compiler.hotspot.GraalHotSpotVMConfig)1 HotSpotCounterOp (org.graalvm.compiler.hotspot.HotSpotCounterOp)1 HotSpotRegistersProvider (org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider)1 LIRInstructionClass (org.graalvm.compiler.lir.LIRInstructionClass)1 LIRValueUtil.asJavaConstant (org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant)1 LIRValueUtil.isJavaConstant (org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant)1