Search in sources :

Example 31 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class PmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
    String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(output.getName());
    long rlen = sec.getScalarInput(_nrow.getName(), _nrow.getValueType(), _nrow.isLiteral()).getLongValue();
    // get inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
    // execute pmm instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDPMMFunction(_type, in2, rlen, mc.getRowsPerBlock()));
    out = RDDAggregateUtils.sumByKeyStable(out, false);
    // put output RDD handle into symbol table
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), rddVar);
    sec.addLineageBroadcast(output.getName(), bcastVar);
    // update output statistics if not inferred
    updateBinaryMMOutputMatrixCharacteristics(sec, false);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 32 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class QuantilePickSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdds
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    // (in contrast to cp instructions, w/o weights does not materializes weights of 1)
    switch(_type) {
        case VALUEPICK:
            {
                ScalarObject quantile = ec.getScalarInput(input2);
                double[] wt = getWeightedQuantileSummary(in, mc, quantile.getDoubleValue());
                ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
                break;
            }
        case MEDIAN:
            {
                double[] wt = getWeightedQuantileSummary(in, mc, 0.5);
                ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
                break;
            }
        case IQM:
            {
                double[] wt = getWeightedQuantileSummary(in, mc, 0.25, 0.75);
                long key25 = (long) Math.ceil(wt[1]);
                long key75 = (long) Math.ceil(wt[2]);
                JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.filter(new FilterFunction(key25 + 1, key75, mc.getRowsPerBlock())).mapToPair(new ExtractAndSumFunction(key25 + 1, key75, mc.getRowsPerBlock()));
                double sum = RDDAggregateUtils.sumStable(out).getValue(0, 0);
                double val = MatrixBlock.computeIQMCorrection(sum, wt[0], wt[3], wt[5], wt[4], wt[6]);
                ec.setScalarOutput(output.getName(), new DoubleObject(val));
                break;
            }
        default:
            throw new DMLRuntimeException("Unsupported qpick operation type: " + _type);
    }
}
Also used : ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 33 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class ReblockSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // set the output characteristics
    CacheableData<?> obj = sec.getCacheableData(input1.getName());
    MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
    // get the source format form the meta data
    MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
    if (iimd == null)
        throw new DMLRuntimeException("Error: Metadata not found");
    InputInfo iinfo = iimd.getInputInfo();
    // check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
    if (Recompiler.checkCPReblock(sec, input1.getName())) {
        if (input1.getDataType() == DataType.MATRIX)
            Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
        else if (input1.getDataType() == DataType.FRAME)
            Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
        return;
    }
    // execute matrix/frame reblock
    if (input1.getDataType() == DataType.MATRIX)
        processMatrixReblockInstruction(sec, iinfo);
    else if (input1.getDataType() == DataType.FRAME)
        processFrameReblockInstruction(sec, iinfo);
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 34 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class RmmSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input rdds
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mcOut = updateBinaryMMOutputMatrixCharacteristics(sec, true);
    // execute Spark RMM instruction
    // step 1: prepare join keys (w/ shallow replication), i/j/k
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
    JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
    // step 2: join prepared datasets, multiply, and aggregate
    int numPartJoin = Math.max(getNumJoinPartitions(mc1, mc2), SparkExecutionContext.getDefaultParallelism(true));
    int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join by result block
    numPartJoin).mapToPair(// do matrix multiplication
    new RmmMultiplyFunction());
    out = // aggregation per result block
    RDDAggregateUtils.sumByKeyStable(// aggregation per result block
    out, numPartOut, false);
    // put output block into symbol table (no lineage because single block)
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TripleIndexes(org.apache.sysml.runtime.matrix.data.TripleIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 35 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class TernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = !input1.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = !input2.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = !input3.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    MatrixBlock m1 = input1.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input1).getDoubleValue());
    MatrixBlock m2 = input2.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input2).getDoubleValue());
    MatrixBlock m3 = input3.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input3).getDoubleValue());
    TernaryOperator op = (TernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (input1.isMatrix() && !input2.isMatrix() && !input3.isMatrix())
        out = in1.mapValues(new TernaryFunctionMSS(op, m1, m2, m3));
    else if (!input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
        out = in2.mapValues(new TernaryFunctionSMS(op, m1, m2, m3));
    else if (!input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
        out = in3.mapValues(new TernaryFunctionSSM(op, m1, m2, m3));
    else if (input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
        out = in1.join(in2).mapValues(new TernaryFunctionMMS(op, m1, m2, m3));
    else if (input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
        out = in1.join(in3).mapValues(new TernaryFunctionMSM(op, m1, m2, m3));
    else if (!input1.isMatrix() && input2.isMatrix() && input3.isMatrix())
        out = in2.join(in3).mapValues(new TernaryFunctionSMM(op, m1, m2, m3));
    else
        // all matrices
        out = in1.join(in2).join(in3).mapValues(new TernaryFunctionMMM(op, m1, m2, m3));
    // set output RDD
    updateTernaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    if (input1.isMatrix())
        sec.addLineageRDD(output.getName(), input1.getName());
    if (input2.isMatrix())
        sec.addLineageRDD(output.getName(), input2.getName());
    if (input3.isMatrix())
        sec.addLineageRDD(output.getName(), input3.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) TernaryOperator(org.apache.sysml.runtime.matrix.operators.TernaryOperator) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4