Search in sources :

Example 51 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class QuaternaryInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    QuaternaryOperator qop = (QuaternaryOperator) optr;
    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(_input1);
    if (blkList != null)
        for (IndexedMatrixValue imv : blkList) {
            // Step 1: prepare inputs and output
            if (imv == null)
                continue;
            MatrixIndexes inIx = imv.getIndexes();
            MatrixBlock inVal = (MatrixBlock) imv.getValue();
            // allocate space for the output value
            IndexedMatrixValue iout = null;
            if (output == _input1)
                iout = tempValue;
            else
                iout = cachedValues.holdPlace(output, valueClass);
            MatrixIndexes outIx = iout.getIndexes();
            MatrixValue outVal = iout.getValue();
            // Step 2: get remaining inputs: Wij, Ui, Vj
            MatrixBlock Xij = inVal;
            // get Wij if existing (null of WeightsType.NONE or WSigmoid any type)
            IndexedMatrixValue iWij = (_input4 != -1) ? cachedValues.getFirst(_input4) : null;
            MatrixValue Wij = (iWij != null) ? iWij.getValue() : null;
            if (null == Wij && qop.hasFourInputs()) {
                MatrixBlock mb = new MatrixBlock(1, 1, false);
                String[] parts = InstructionUtils.getInstructionParts(instString);
                mb.quickSetValue(0, 0, Double.valueOf(parts[4]));
                Wij = mb;
            }
            // get Ui and Vj, potentially through distributed cache
            MatrixValue Ui = // U
            (!_cacheU) ? // U
            cachedValues.getFirst(_input2).getValue() : MRBaseForCommonInstructions.dcValues.get(_input2).getDataBlock((int) inIx.getRowIndex(), 1).getValue();
            MatrixValue Vj = // t(V)
            (!_cacheV) ? // t(V)
            cachedValues.getFirst(_input3).getValue() : MRBaseForCommonInstructions.dcValues.get(_input3).getDataBlock((int) inIx.getColumnIndex(), 1).getValue();
            // handle special input case: //V through shuffle -> t(V)
            if (Ui.getNumColumns() != Vj.getNumColumns()) {
                Vj = LibMatrixReorg.reorg((MatrixBlock) Vj, new MatrixBlock(Vj.getNumColumns(), Vj.getNumRows(), Vj.isInSparseFormat()), new ReorgOperator(SwapIndex.getSwapIndexFnObject()));
            }
            // Step 3: process instruction
            Xij.quaternaryOperations(qop, (MatrixBlock) Ui, (MatrixBlock) Vj, (MatrixBlock) Wij, (MatrixBlock) outVal);
            if (qop.wtype1 != null || qop.wtype4 != null)
                // wsloss
                outIx.setIndexes(1, 1);
            else if (qop.wtype2 != null || qop.wtype5 != null || qop.wtype3 != null && qop.wtype3.isBasic())
                // wsigmoid/wdivmm-basic
                outIx.setIndexes(inIx);
            else {
                // wdivmm
                boolean left = qop.wtype3.isLeft();
                outIx.setIndexes(left ? inIx.getColumnIndex() : inIx.getRowIndex(), 1);
            }
            // put the output value in the cache
            if (iout == tempValue)
                cachedValues.add(output, iout);
        }
}
Also used : QuaternaryOperator(org.apache.sysml.runtime.matrix.operators.QuaternaryOperator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReorgOperator(org.apache.sysml.runtime.matrix.operators.ReorgOperator) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 52 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class ReplicateInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input);
    if (blkList != null) {
        for (IndexedMatrixValue in : blkList) {
            if (in == null)
                continue;
            // allocate space for the output value
            IndexedMatrixValue out;
            if (input == output)
                out = tempValue;
            else
                out = cachedValues.holdPlace(output, valueClass);
            // process instruction
            MatrixIndexes inIx = in.getIndexes();
            MatrixValue inVal = in.getValue();
            if (// replicate columns
            _repCols) {
                // (e.g., M is Nx2700, blocksize=1000 -> numRep 2 because original block passed to index 1)
                if (// blocksize should be 1000 or similar
                blockColFactor <= 1)
                    LOG.warn("Block size of input matrix is: brlen=" + blockRowFactor + ", bclen=" + blockColFactor + ".");
                long numRep = (long) Math.ceil((double) _lenM / blockColFactor) - 1;
                // hence the memory overhead is very small)
                for (long i = 0; i < numRep; i++) {
                    IndexedMatrixValue repV = cachedValues.holdPlace(output, valueClass);
                    MatrixIndexes repIX = repV.getIndexes();
                    repIX.setIndexes(inIx.getRowIndex(), 2 + i);
                    repV.set(repIX, inVal);
                }
                // output original block
                out.set(inIx, inVal);
            } else // replicate rows
            {
                // (e.g., M is Nx2700, blocksize=1000 -> numRep 2 because original block passed to index 1)
                if (// blocksize should be 1000 or similar
                blockRowFactor <= 1)
                    LOG.warn("Block size of input matrix is: brlen=" + blockRowFactor + ", bclen=" + blockColFactor + ".");
                long numRep = (long) Math.ceil((double) _lenM / blockRowFactor) - 1;
                // hence the memory overhead is very small)
                for (long i = 0; i < numRep; i++) {
                    IndexedMatrixValue repV = cachedValues.holdPlace(output, valueClass);
                    MatrixIndexes repIX = repV.getIndexes();
                    repIX.setIndexes(2 + i, inIx.getColumnIndex());
                    repV.set(repIX, inVal);
                }
                // output original block
                out.set(inIx, inVal);
            }
            // put the output value in the cache
            if (out == tempValue)
                cachedValues.add(output, out);
        }
    }
}
Also used : IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue) MatrixValue(org.apache.sysml.runtime.matrix.data.MatrixValue) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 53 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class TernaryInstruction method processInstruction.

@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) {
    MatrixBlock lm1 = input1.isMatrix() ? (MatrixBlock) cachedValues.getFirst(ixinput1).getValue() : m1;
    MatrixBlock lm2 = input2.isMatrix() ? (MatrixBlock) cachedValues.getFirst(ixinput2).getValue() : m2;
    MatrixBlock lm3 = input3.isMatrix() ? (MatrixBlock) cachedValues.getFirst(ixinput3).getValue() : m3;
    MatrixIndexes ixin = input1.isMatrix() ? cachedValues.getFirst(ixinput1).getIndexes() : input2.isMatrix() ? cachedValues.getFirst(ixinput2).getIndexes() : cachedValues.getFirst(ixinput3).getIndexes();
    // prepare output
    IndexedMatrixValue out = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
    out.getIndexes().setIndexes(ixin);
    // process instruction
    TernaryOperator op = (TernaryOperator) optr;
    lm1.ternaryOperations(op, lm2, lm3, (MatrixBlock) out.getValue());
    // put the output value in the cache
    cachedValues.add(ixoutput, out);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TernaryOperator(org.apache.sysml.runtime.matrix.operators.TernaryOperator) IndexedMatrixValue(org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)

Example 54 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class AggregateTernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get inputs
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = // matrix or literal 1
    input3.isLiteral() ? // matrix or literal 1
    null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    // execute aggregate ternary operation
    AggregateTernaryOperator aggop = (AggregateTernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (in3 != null) {
        // 3 inputs
        out = in1.join(in2).join(in3).mapToPair(new RDDAggregateTernaryFunction(aggop));
    } else {
        // 2 inputs (third is literal 1)
        out = in1.join(in2).mapToPair(new RDDAggregateTernaryFunction2(aggop));
    }
    // aggregate partial results
    if (// tak+*
    aggop.indexFn instanceof ReduceAll) {
        // aggregate and create output (no lineage because scalar)
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out.values());
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else if (// tack+* single block
    mcIn.dimsKnown() && mcIn.getCols() <= mcIn.getColsPerBlock()) {
        // single block aggregation and drop correction
        MatrixBlock ret = RDDAggregateUtils.aggStable(out, aggop.aggOp);
        ret.dropLastRowsOrColumns(aggop.aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
    } else // tack+* multi block
    {
        // multi-block aggregation and drop correction
        out = RDDAggregateUtils.aggByKeyStable(out, aggop.aggOp, false);
        out = out.mapValues(new AggregateDropCorrectionFunction(aggop.aggOp));
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, aggop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        if (in3 != null)
            sec.addLineageRDD(output.getName(), input3.getName());
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) AggregateTernaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 55 with MatrixIndexes

use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.

the class AppendGSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    // general case append (map-extend, aggregate)
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    // General case: This one needs shifting and merging and hence has huge performance hit.
    JavaPairRDD<MatrixIndexes, MatrixBlock> shifted_in2 = in2.flatMapToPair(new ShiftMatrix(mc1, mc2, _cbind));
    out = in1.cogroup(shifted_in2).mapToPair(new MergeWithShiftedBlocks(mc1, mc2, _cbind));
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)165 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)142 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)70 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)48 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)41 Path (org.apache.hadoop.fs.Path)24 SequenceFile (org.apache.hadoop.io.SequenceFile)23 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 ArrayList (java.util.ArrayList)21 IOException (java.io.IOException)20 FileSystem (org.apache.hadoop.fs.FileSystem)20 MatrixCell (org.apache.sysml.runtime.matrix.data.MatrixCell)19 Tuple2 (scala.Tuple2)19 IndexedMatrixValue (org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue)17 JobConf (org.apache.hadoop.mapred.JobConf)14 MatrixValue (org.apache.sysml.runtime.matrix.data.MatrixValue)11 CompressedMatrixBlock (org.apache.sysml.runtime.compress.CompressedMatrixBlock)10 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)10 File (java.io.File)9 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9