Search in sources :

Example 21 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class LocalParWorker method run.

@Override
public void run() {
    // monitoring start
    Timing time1 = (_monitor ? new Timing(true) : null);
    // spark context creation (if data cached already created)
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.setThreadLocalSchedulerPool("parforPool" + _workerID);
    }
    // Initialize this GPUContext to this thread
    if (DMLScript.USE_ACCELERATOR) {
        try {
            _ec.getGPUContext(0).initializeThread();
        } catch (DMLRuntimeException e) {
            LOG.error("Error executing task because of failure in GPU backend: ", e);
            LOG.error("Stopping LocalParWorker.");
            return;
        }
    }
    // setup compiler config for worker thread
    ConfigurationManager.setLocalConfig(_cconf);
    // continuous execution (execute tasks until (1) stopped or (2) no more tasks)
    Task lTask = null;
    while (!_stopped) {
        // dequeue the next task (abort on NO_MORE_TASKS or error)
        try {
            lTask = _taskQueue.dequeueTask();
            if (// task queue closed (no more tasks)
            lTask == LocalTaskQueue.NO_MORE_TASKS)
                // normal end of parallel worker
                break;
        } catch (Exception ex) {
            // abort on taskqueue error
            LOG.warn("Error reading from task queue: " + ex.getMessage());
            LOG.warn("Stopping LocalParWorker.");
            // no exception thrown to prevent blocking on join
            break;
        }
        // execute the task sequentially (re-try on error)
        boolean success = false;
        int retrys = _max_retry;
        while (!success) {
            try {
                // /////
                // core execution (see ParWorker)
                executeTask(lTask);
                success = true;
            } catch (Exception ex) {
                LOG.error("Failed to execute " + lTask.toString() + ", retry:" + retrys, ex);
                if (retrys > 0)
                    // retry on task error
                    retrys--;
                else {
                    // abort on no remaining retrys
                    LOG.error("Error executing task: ", ex);
                    LOG.error("Stopping LocalParWorker.");
                    // no exception thrown to prevent blocking on join
                    break;
                }
            }
        }
    }
    // setup fair scheduler pool for worker thread
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.cleanupThreadLocalSchedulerPool();
    }
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMTASKS, _numTasks);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMITERS, _numIters);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_EXEC_T, time1.stop());
    }
}
Also used : Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 22 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class AggregateTernarySPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get inputs
    MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = // matrix or literal 1
    input3.isLiteral() ? // matrix or literal 1
    null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
    // execute aggregate ternary operation
    AggregateTernaryOperator aggop = (AggregateTernaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    if (in3 != null) {
        // 3 inputs
        out = in1.join(in2).join(in3).mapToPair(new RDDAggregateTernaryFunction(aggop));
    } else {
        // 2 inputs (third is literal 1)
        out = in1.join(in2).mapToPair(new RDDAggregateTernaryFunction2(aggop));
    }
    // aggregate partial results
    if (// tak+*
    aggop.indexFn instanceof ReduceAll) {
        // aggregate and create output (no lineage because scalar)
        MatrixBlock tmp = RDDAggregateUtils.sumStable(out.values());
        DoubleObject ret = new DoubleObject(tmp.getValue(0, 0));
        sec.setVariable(output.getName(), ret);
    } else if (// tack+* single block
    mcIn.dimsKnown() && mcIn.getCols() <= mcIn.getColsPerBlock()) {
        // single block aggregation and drop correction
        MatrixBlock ret = RDDAggregateUtils.aggStable(out, aggop.aggOp);
        ret.dropLastRowsOrColumns(aggop.aggOp.correctionLocation);
        // put output block into symbol table (no lineage because single block)
        // this also includes implicit maintenance of matrix characteristics
        sec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
    } else // tack+* multi block
    {
        // multi-block aggregation and drop correction
        out = RDDAggregateUtils.aggByKeyStable(out, aggop.aggOp, false);
        out = out.mapValues(new AggregateDropCorrectionFunction(aggop.aggOp));
        // put output RDD handle into symbol table
        updateUnaryAggOutputMatrixCharacteristics(sec, aggop.indexFn);
        sec.setRDDHandleForVariable(output.getName(), out);
        sec.addLineageRDD(output.getName(), input1.getName());
        sec.addLineageRDD(output.getName(), input2.getName());
        if (in3 != null)
            sec.addLineageRDD(output.getName(), input3.getName());
    }
}
Also used : ReduceAll(org.apache.sysml.runtime.functionobjects.ReduceAll) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) AggregateDropCorrectionFunction(org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction) AggregateTernaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateTernaryOperator) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 23 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class AppendGSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    // general case append (map-extend, aggregate)
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
    // General case: This one needs shifting and merging and hence has huge performance hit.
    JavaPairRDD<MatrixIndexes, MatrixBlock> shifted_in2 = in2.flatMapToPair(new ShiftMatrix(mc1, mc2, _cbind));
    out = in1.cogroup(shifted_in2).mapToPair(new MergeWithShiftedBlocks(mc1, mc2, _cbind));
    // put output RDD handle into symbol table
    updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 24 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class BinUaggChainSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    // execute unary builtin operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new RDDBinUaggChainFunction(_bOp, _uaggOp));
    // set output RDD
    updateUnaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 25 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class BinarySPInstruction method processMatrixMatrixBinaryInstruction.

/**
 * Common binary matrix-matrix process instruction
 *
 * @param ec execution context
 */
protected void processMatrixMatrixBinaryInstruction(ExecutionContext ec) {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    // sanity check dimensions
    checkMatrixMatrixBinaryCharacteristics(sec);
    updateBinaryOutputMatrixCharacteristics(sec);
    // Get input RDDs
    JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
    MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
    MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
    MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
    BinaryOperator bop = (BinaryOperator) _optr;
    // vector replication if required (mv or outer operations)
    boolean rowvector = (mc2.getRows() == 1 && mc1.getRows() > 1);
    long numRepLeft = getNumReplicas(mc1, mc2, true);
    long numRepRight = getNumReplicas(mc1, mc2, false);
    if (numRepLeft > 1)
        in1 = in1.flatMapToPair(new ReplicateVectorFunction(false, numRepLeft));
    if (numRepRight > 1)
        in2 = in2.flatMapToPair(new ReplicateVectorFunction(rowvector, numRepRight));
    int numPrefPart = SparkUtils.isHashPartitioned(in1) ? in1.getNumPartitions() : SparkUtils.isHashPartitioned(in2) ? in2.getNumPartitions() : Math.min(in1.getNumPartitions() + in2.getNumPartitions(), 2 * SparkUtils.getNumPreferredPartitions(mcOut));
    // execute binary operation
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2, numPrefPart).mapValues(new MatrixMatrixBinaryOpFunction(bop));
    // set output RDD
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
    sec.addLineageRDD(output.getName(), input2.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) ReplicateVectorFunction(org.apache.sysml.runtime.instructions.spark.functions.ReplicateVectorFunction) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) BinaryOperator(org.apache.sysml.runtime.matrix.operators.BinaryOperator) MatrixMatrixBinaryOpFunction(org.apache.sysml.runtime.instructions.spark.functions.MatrixMatrixBinaryOpFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4