Search in sources :

Example 86 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class BinUaggChainSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    // execute unary builtin operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new RDDBinUaggChainFunction(_bOp, _uaggOp));
    // set output RDD
    updateUnaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 87 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class BinarySPInstruction method processMatrixMatrixBinaryInstruction.

/**
 * Common binary matrix-matrix process instruction
 *
 * @param ec execution context
 */
protected void processMatrixMatrixBinaryInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // sanity check dimensions
    checkMatrixMatrixBinaryCharacteristics(sec);
    updateBinaryOutputMatrixCharacteristics(sec);
    // Get input RDDs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    BinaryOperator bop = (BinaryOperator) _optr;
    // vector replication if required (mv or outer operations)
    boolean rowvector = (mc2.getRows() == 1 && mc1.getRows() > 1);
    long numRepLeft = getNumReplicas(mc1, mc2, true);
    long numRepRight = getNumReplicas(mc1, mc2, false);
    if (numRepLeft > 1)
        in1 = in1.flatMapToPair(new ReplicateVectorFunction(false, numRepLeft));
    if (numRepRight > 1)
        in2 = in2.flatMapToPair(new ReplicateVectorFunction(rowvector, numRepRight));
    int numPrefPart = SparkUtils.isHashPartitioned(in1) ? in1.getNumPartitions() : SparkUtils.isHashPartitioned(in2) ? in2.getNumPartitions() : Math.min(in1.getNumPartitions() + in2.getNumPartitions(), 2 * SparkUtils.getNumPreferredPartitions(mcOut));
    // execute binary operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2, numPrefPart).mapValues(new MatrixMatrixBinaryOpFunction(bop));
    // set output RDD
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReplicateVectorFunction(org.apache.sysml.runtime.instructions.spark.functions.ReplicateVectorFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) MatrixMatrixBinaryOpFunction(org.apache.sysml.runtime.instructions.spark.functions.MatrixMatrixBinaryOpFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 88 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class BinarySPInstruction method processMatrixScalarBinaryInstruction.

protected void processMatrixScalarBinaryInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input RDD
    String rddVar = (input1.getDataType() == DataType.MATRIX) ? input1.getName() : input2.getName();
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    // get operator and scalar
    CPOperand scalar = (input1.getDataType() == DataType.MATRIX) ? input2 : input1;
    ScalarObject constant = (ScalarObject) ec.getScalarInput(scalar.getName(), scalar.getValueType(), scalar.isLiteral());
    ScalarOperator sc_op = (ScalarOperator) _optr;
    sc_op = sc_op.setConstant(constant.getDoubleValue());
    // execute scalar matrix arithmetic instruction
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.mapValues(new MatrixScalarUnaryFunction(sc_op));
    // put output RDD handle into symbol table
    updateUnaryOutputMatrixCharacteristics(sec, rddVar, output.getName());
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), rddVar);
}
Also used : ScalarOperator(org.apache.sysml.runtime.matrix.operators.ScalarOperator) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixScalarUnaryFunction(org.apache.sysml.runtime.instructions.spark.functions.MatrixScalarUnaryFunction) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) CPOperand(org.apache.sysml.runtime.instructions.cp.CPOperand) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 89 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class BinarySPInstruction method processMatrixBVectorBinaryInstruction.

protected void processMatrixBVectorBinaryInstruction(ExecutionContext ec, VectorType vtype) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // sanity check dimensions
    checkMatrixMatrixBinaryCharacteristics(sec);
    // get input RDDs
    String rddVar = input1.getName();
    String bcastVar = input2.getName();
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
    PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(rddVar);
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(bcastVar);
    BinaryOperator bop = (BinaryOperator) _optr;
    boolean isOuter = (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && mc2.getCols() > 1);
    // execute map binary operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (isOuter) {
        out = in1.flatMapToPair(new OuterVectorBinaryOpFunction(bop, in2));
    } else {
        // default
        // note: we use mappartition in order to preserve partitioning information for
        // binary mv operations where the keys are guaranteed not to change, the reason
        // why we cannot use mapValues is the need for broadcast key lookups.
        // alternative: out = in1.mapToPair(new MatrixVectorBinaryOpFunction(bop, in2, vtype));
        out = in1.mapPartitionsToPair(new MatrixVectorBinaryOpPartitionFunction(bop, in2, vtype), true);
    }
    // set output RDD
    updateBinaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), rddVar);
    sec.addLineageBroadcast(output.getName(), bcastVar);
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) MatrixVectorBinaryOpPartitionFunction(org.apache.sysml.runtime.instructions.spark.functions.MatrixVectorBinaryOpPartitionFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OuterVectorBinaryOpFunction(org.apache.sysml.runtime.instructions.spark.functions.OuterVectorBinaryOpFunction)

Example 90 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class RemoteDPParForSpark method runJob.

public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, PartitionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
    String jobname = "ParFor-DPESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();
    // prepare input parameters
    MatrixObject mo = sec.getMatrixObject(matrixvar);
    MatrixCharacteristics mc = mo.getMatrixCharacteristics();
    // initialize accumulators for tasks/iterations, and inputs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
    LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
    LongAccumulator aIters = sc.sc().longAccumulator("iterations");
    // compute number of reducers (to avoid OOMs and reduce memory pressure)
    int numParts = SparkUtils.getNumPreferredPartitions(mc, in);
    int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
    // core parfor datapartition-execute (w/ or w/o shuffle, depending on data characteristics)
    RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
    JavaPairRDD<Long, Writable> tmp = getPartitionedInput(sec, matrixvar, oi, dpf);
    List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapPartitionsToPair(// execute parfor tasks, incl cleanup
    efun).collect();
    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    // get accumulator value
    int numTasks = aTasks.value().intValue();
    // get accumulator value
    int numIters = aIters.value().intValue();
    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }
    return ret;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) Writable(org.apache.hadoop.io.Writable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) LongAccumulator(org.apache.spark.util.LongAccumulator) Tuple2(scala.Tuple2) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4