Search in sources :

Example 76 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class AggregateTernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get inputs
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = // matrix or literal 1
    input3.isLiteral() ? // matrix or literal 1
    null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    // execute aggregate ternary operation
    AggregateTernaryOperator aggop = (AggregateTernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (in3 != null) {
        // 3 inputs
        out = in1.join(in2).join(in3).mapToPair(new RDDAggregateTernaryFunction(aggop));
    } else {
        // 2 inputs (third is literal 1)
        out = in1.join(in2).mapToPair(new RDDAggregateTernaryFunction2(aggop));
    }
    // aggregate partial results
    if (// tak+*
    aggop.indexFn instanceof ReduceAll) {
        // aggregate and create output (no lineage because scalar)
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out.values());
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else if (// tack+* single block
    mcIn.dimsKnown() && mcIn.getCols() <= mcIn.getColsPerBlock()) {
        // single block aggregation and drop correction
        MatrixBlock ret = RDDAggregateUtils.aggStable(out, aggop.aggOp);
        ret.dropLastRowsOrColumns(aggop.aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
    } else // tack+* multi block
    {
        // multi-block aggregation and drop correction
        out = RDDAggregateUtils.aggByKeyStable(out, aggop.aggOp, false);
        out = out.mapValues(new AggregateDropCorrectionFunction(aggop.aggOp));
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, aggop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        if (in3 != null)
            sec.addLineageRDD(output.getName(), input3.getName());
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) AggregateTernaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 77 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class AppendGSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    // general case append (map-extend, aggregate)
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    // General case: This one needs shifting and merging and hence has huge performance hit.
    JavaPairRDD<MatrixIndexes, MatrixBlock> shifted_in2 = in2.flatMapToPair(new ShiftMatrix(mc1, mc2, _cbind));
    out = in1.cogroup(shifted_in2).mapToPair(new MergeWithShiftedBlocks(mc1, mc2, _cbind));
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 78 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class BinarySPInstruction method checkMatrixMatrixBinaryCharacteristics.

protected void checkMatrixMatrixBinaryCharacteristics(SparkExecutionContext sec) {
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    // check for unknown input dimensions
    if (!(mc1.dimsKnown() && mc2.dimsKnown())) {
        throw new DMLRuntimeException("Unknown dimensions matrix-matrix binary operations: " + "[" + mc1.getRows() + "x" + mc1.getCols() + " vs " + mc2.getRows() + "x" + mc2.getCols() + "]");
    }
    // check for dimension mismatch
    if ((mc1.getRows() != mc2.getRows() || mc1.getCols() != mc2.getCols()) && // matrix-colvector
    !(mc1.getRows() == mc2.getRows() && mc2.getCols() == 1) && // matrix-rowvector
    !(mc1.getCols() == mc2.getCols() && mc2.getRows() == 1) && // outer colvector-rowvector
    !(mc1.getCols() == 1 && mc2.getRows() == 1)) {
        throw new DMLRuntimeException("Dimensions mismatch matrix-matrix binary operations: " + "[" + mc1.getRows() + "x" + mc1.getCols() + " vs " + mc2.getRows() + "x" + mc2.getCols() + "]");
    }
    if (mc1.getRowsPerBlock() != mc2.getRowsPerBlock() || mc1.getColsPerBlock() != mc2.getColsPerBlock()) {
        throw new DMLRuntimeException("Blocksize mismatch matrix-matrix binary operations: " + "[" + mc1.getRowsPerBlock() + "x" + mc1.getColsPerBlock() + " vs " + mc2.getRowsPerBlock() + "x" + mc2.getColsPerBlock() + "]");
    }
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 79 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class BinarySPInstruction method processMatrixMatrixBinaryInstruction.

/**
 * Common binary matrix-matrix process instruction
 *
 * @param ec execution context
 */
protected void processMatrixMatrixBinaryInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // sanity check dimensions
    checkMatrixMatrixBinaryCharacteristics(sec);
    updateBinaryOutputMatrixCharacteristics(sec);
    // Get input RDDs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    BinaryOperator bop = (BinaryOperator) _optr;
    // vector replication if required (mv or outer operations)
    boolean rowvector = (mc2.getRows() == 1 && mc1.getRows() > 1);
    long numRepLeft = getNumReplicas(mc1, mc2, true);
    long numRepRight = getNumReplicas(mc1, mc2, false);
    if (numRepLeft > 1)
        in1 = in1.flatMapToPair(new ReplicateVectorFunction(false, numRepLeft));
    if (numRepRight > 1)
        in2 = in2.flatMapToPair(new ReplicateVectorFunction(rowvector, numRepRight));
    int numPrefPart = SparkUtils.isHashPartitioned(in1) ? in1.getNumPartitions() : SparkUtils.isHashPartitioned(in2) ? in2.getNumPartitions() : Math.min(in1.getNumPartitions() + in2.getNumPartitions(), 2 * SparkUtils.getNumPreferredPartitions(mcOut));
    // execute binary operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2, numPrefPart).mapValues(new MatrixMatrixBinaryOpFunction(bop));
    // set output RDD
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReplicateVectorFunction(org.apache.sysml.runtime.instructions.spark.functions.ReplicateVectorFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) MatrixMatrixBinaryOpFunction(org.apache.sysml.runtime.instructions.spark.functions.MatrixMatrixBinaryOpFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 80 with MatrixCharacteristics

use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.

the class BinarySPInstruction method checkBinaryAppendInputCharacteristics.

protected void checkBinaryAppendInputCharacteristics(SparkExecutionContext sec, boolean cbind, boolean checkSingleBlk, boolean checkAligned) {
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    if (!mc1.dimsKnown() || !mc2.dimsKnown()) {
        throw new DMLRuntimeException("The dimensions unknown for inputs");
    } else if (cbind && mc1.getRows() != mc2.getRows()) {
        throw new DMLRuntimeException("The number of rows of inputs should match for append-cbind instruction");
    } else if (!cbind && mc1.getCols() != mc2.getCols()) {
        throw new DMLRuntimeException("The number of columns of inputs should match for append-rbind instruction");
    } else if (mc1.getRowsPerBlock() != mc2.getRowsPerBlock() || mc1.getColsPerBlock() != mc2.getColsPerBlock()) {
        throw new DMLRuntimeException("The block sizes donot match for input matrices");
    }
    if (checkSingleBlk) {
        if (mc1.getCols() + mc2.getCols() > mc1.getColsPerBlock())
            throw new DMLRuntimeException("Output must have at most one column block");
    }
    if (checkAligned) {
        if (mc1.getCols() % mc1.getColsPerBlock() != 0)
            throw new DMLRuntimeException("Input matrices are not aligned to blocksize boundaries. Wrong append selected");
    }
}
Also used : MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)296 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)102 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)89 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)70 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)50 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)47 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)45 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)42 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)37 CellIndex (org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex)37 IOException (java.io.IOException)30 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)27 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)22 ArrayList (java.util.ArrayList)19 ValueType (org.apache.sysml.parser.Expression.ValueType)19 Path (org.apache.hadoop.fs.Path)17 LongWritable (org.apache.hadoop.io.LongWritable)16 Test (org.junit.Test)15 Text (org.apache.hadoop.io.Text)14