Search in sources :

Example 51 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class RemoteParForSpark method runJob.

public static RemoteParForJobReturn runJob(long pfid, String prog, HashMap<String, byte[]> clsMap, List<Task> tasks, ExecutionContext ec, boolean cpCaching, int numMappers) {
    String jobname = "ParFor-ESP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    JavaSparkContext sc = sec.getSparkContext();
    // initialize accumulators for tasks/iterations
    LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
    LongAccumulator aIters = sc.sc().longAccumulator("iterations");
    // reset cached shared inputs for correctness in local mode
    long jobid = _jobID.getNextID();
    if (InfrastructureAnalyzer.isLocalMode())
        RemoteParForSparkWorker.cleanupCachedVariables(jobid);
    // run remote_spark parfor job
    // (w/o lazy evaluation to fit existing parfor framework, e.g., result merge)
    List<Tuple2<Long, String>> out = // create rdd of parfor tasks
    sc.parallelize(tasks, tasks.size()).flatMapToPair(new RemoteParForSparkWorker(jobid, prog, clsMap, cpCaching, aTasks, aIters)).collect();
    // de-serialize results
    LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
    // get accumulator value
    int numTasks = aTasks.value().intValue();
    // get accumulator value
    int numIters = aIters.value().intValue();
    // create output symbol table entries
    RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS)
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    return ret;
}
Also used : LongAccumulator(org.apache.spark.util.LongAccumulator) Tuple2(scala.Tuple2) LocalVariableMap(org.apache.sysml.runtime.controlprogram.LocalVariableMap) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext)

Example 52 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class DMLScript method execute.

// /////////////////////////////
// private internal interface
// (core compilation and execute)
// //////
/**
 * The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
 * and customized parameters have been put into _argVals
 *
 * @param dmlScriptStr DML script string
 * @param fnameOptConfig configuration file
 * @param argVals map of argument values
 * @param allArgs arguments
 * @param scriptType type of script (DML or PyDML)
 * @throws IOException if IOException occurs
 */
private static void execute(String dmlScriptStr, String fnameOptConfig, Map<String, String> argVals, String[] allArgs, ScriptType scriptType) throws IOException {
    SCRIPT_TYPE = scriptType;
    // print basic time and environment info
    printStartExecInfo(dmlScriptStr);
    // Step 1: parse configuration files & write any configuration specific global variables
    DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig);
    ConfigurationManager.setGlobalConfig(dmlconf);
    CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
    ConfigurationManager.setGlobalConfig(cconf);
    LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());
    // Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU)
    GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
    String evictionPolicy = dmlconf.getTextValue(DMLConfig.GPU_EVICTION_POLICY).toUpperCase();
    try {
        DMLScript.GPU_EVICTION_POLICY = EvictionPolicy.valueOf(evictionPolicy);
    } catch (IllegalArgumentException e) {
        throw new RuntimeException("Unsupported eviction policy:" + evictionPolicy);
    }
    // Step 2: set local/remote memory if requested (for compile in AM context)
    if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
        DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf);
    }
    // Step 3: parse dml script
    Statistics.startCompileTimer();
    ParserWrapper parser = ParserFactory.createParser(scriptType);
    DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);
    // Step 4: construct HOP DAGs (incl LVA, validate, and setup)
    DMLTranslator dmlt = new DMLTranslator(prog);
    dmlt.liveVariableAnalysis(prog);
    dmlt.validateParseTree(prog);
    dmlt.constructHops(prog);
    // init working directories (before usage by following compilation steps)
    initHadoopExecution(dmlconf);
    // Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
    dmlt.rewriteHopsDAG(prog);
    // Step 6: construct lops (incl exec type and op selection)
    dmlt.constructLops(prog);
    if (LOG.isDebugEnabled()) {
        LOG.debug("\n********************** LOPS DAG *******************");
        dmlt.printLops(prog);
        dmlt.resetLopsDAGVisitStatus(prog);
    }
    // Step 7: generate runtime program, incl codegen
    Program rtprog = dmlt.getRuntimeProgram(prog, dmlconf);
    // launch SystemML appmaster (if requested and not already in launched AM)
    if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
        if (!isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog))
            // if AM launch unsuccessful, fall back to normal execute
            return;
        if (// in AM context (not failed AM launch)
        isActiveAM())
            DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
    }
    // Step 9: prepare statistics [and optional explain output]
    // count number compiled MR jobs / SP instructions
    ExplainCounts counts = Explain.countDistributedOperations(rtprog);
    Statistics.resetNoOfCompiledJobs(counts.numJobs);
    // explain plan of program (hops or runtime)
    if (EXPLAIN != ExplainType.NONE)
        LOG.info(Explain.display(prog, rtprog, EXPLAIN, counts));
    Statistics.stopCompileTimer();
    // double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
    // System.out.println("Estimated costs: "+costs);
    // Step 10: execute runtime program
    ExecutionContext ec = null;
    try {
        ec = ExecutionContextFactory.createContext(rtprog);
        ScriptExecutorUtils.executeRuntimeProgram(rtprog, ec, dmlconf, STATISTICS ? STATISTICS_COUNT : 0);
    } finally {
        if (ec != null && ec instanceof SparkExecutionContext)
            ((SparkExecutionContext) ec).close();
        LOG.info("END DML run " + getDateTime());
        // cleanup scratch_space and all working dirs
        cleanupHadoopExecution(dmlconf);
    }
}
Also used : DMLConfig(org.apache.sysml.conf.DMLConfig) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLProgram(org.apache.sysml.parser.DMLProgram) Program(org.apache.sysml.runtime.controlprogram.Program) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) ExecutionContext(org.apache.sysml.runtime.controlprogram.context.ExecutionContext) ExplainCounts(org.apache.sysml.utils.Explain.ExplainCounts) DMLProgram(org.apache.sysml.parser.DMLProgram) ParserWrapper(org.apache.sysml.parser.ParserWrapper) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) CompilerConfig(org.apache.sysml.conf.CompilerConfig) DMLTranslator(org.apache.sysml.parser.DMLTranslator)

Example 53 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class DataPartitionerRemoteSpark method partitionMatrix.

@Override
@SuppressWarnings("unchecked")
protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen, long clen, int brlen, int bclen) {
    String jobname = "ParFor-DPSP";
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    SparkExecutionContext sec = (SparkExecutionContext) _ec;
    try {
        // cleanup existing output files
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
        // get input rdd
        JavaPairRDD<MatrixIndexes, MatrixBlock> inRdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sec.getRDDHandleForMatrixObject(in, InputInfo.BinaryBlockInputInfo);
        // determine degree of parallelism
        MatrixCharacteristics mc = in.getMatrixCharacteristics();
        int numRed = (int) determineNumReducers(inRdd, mc, _numRed);
        // run spark remote data partition job
        DataPartitionerRemoteSparkMapper dpfun = new DataPartitionerRemoteSparkMapper(mc, ii, oi, _format, _n);
        DataPartitionerRemoteSparkReducer wfun = new DataPartitionerRemoteSparkReducer(fnameNew, oi, _replication);
        // partition the input blocks
        inRdd.flatMapToPair(dpfun).groupByKey(// group partition blocks
        numRed).foreach(// write partitions to hdfs
        wfun);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    // maintain statistics
    Statistics.incrementNoOfCompiledSPInst();
    Statistics.incrementNoOfExecutedSPInst();
    if (DMLScript.STATISTICS) {
        Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)

Example 54 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.

the class LocalParWorker method run.

@Override
public void run() {
    // monitoring start
    Timing time1 = (_monitor ? new Timing(true) : null);
    // spark context creation (if data cached already created)
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.setThreadLocalSchedulerPool("parforPool" + _workerID);
    }
    // Initialize this GPUContext to this thread
    if (DMLScript.USE_ACCELERATOR) {
        try {
            _ec.getGPUContext(0).initializeThread();
        } catch (DMLRuntimeException e) {
            LOG.error("Error executing task because of failure in GPU backend: ", e);
            LOG.error("Stopping LocalParWorker.");
            return;
        }
    }
    // setup compiler config for worker thread
    ConfigurationManager.setLocalConfig(_cconf);
    // continuous execution (execute tasks until (1) stopped or (2) no more tasks)
    Task lTask = null;
    while (!_stopped) {
        // dequeue the next task (abort on NO_MORE_TASKS or error)
        try {
            lTask = _taskQueue.dequeueTask();
            if (// task queue closed (no more tasks)
            lTask == LocalTaskQueue.NO_MORE_TASKS)
                // normal end of parallel worker
                break;
        } catch (Exception ex) {
            // abort on taskqueue error
            LOG.warn("Error reading from task queue: " + ex.getMessage());
            LOG.warn("Stopping LocalParWorker.");
            // no exception thrown to prevent blocking on join
            break;
        }
        // execute the task sequentially (re-try on error)
        boolean success = false;
        int retrys = _max_retry;
        while (!success) {
            try {
                // /////
                // core execution (see ParWorker)
                executeTask(lTask);
                success = true;
            } catch (Exception ex) {
                LOG.error("Failed to execute " + lTask.toString() + ", retry:" + retrys, ex);
                if (retrys > 0)
                    // retry on task error
                    retrys--;
                else {
                    // abort on no remaining retrys
                    LOG.error("Error executing task: ", ex);
                    LOG.error("Stopping LocalParWorker.");
                    // no exception thrown to prevent blocking on join
                    break;
                }
            }
        }
    }
    // setup fair scheduler pool for worker thread
    if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
        SparkExecutionContext sec = (SparkExecutionContext) _ec;
        sec.cleanupThreadLocalSchedulerPool();
    }
    if (_monitor) {
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMTASKS, _numTasks);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMITERS, _numIters);
        StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_EXEC_T, time1.stop());
    }
}
Also used : Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 55 with SparkExecutionContext

use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.

the class MatrixBuiltinSPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
    SparkExecutionContext sec = (SparkExecutionContext) ec;
    //get input
    JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
    //execute unary builtin operation
    UnaryOperator uop = (UnaryOperator) _optr;
    JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new RDDMatrixBuiltinUnaryOp(uop));
    //set output RDD
    updateUnaryOutputMatrixCharacteristics(sec);
    sec.setRDDHandleForVariable(output.getName(), out);
    sec.addLineageRDD(output.getName(), input1.getName());
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixIndexes(org.apache.sysml.runtime.matrix.data.MatrixIndexes) SparkExecutionContext(org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext) UnaryOperator(org.apache.sysml.runtime.matrix.operators.UnaryOperator)

Aggregations

SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)112 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)92 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)92 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)71 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)39 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)22 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 DoubleObject (org.apache.sysml.runtime.instructions.cp.DoubleObject)12 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)9 PartitionedBroadcast (org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast)8 FilterNonEmptyBlocksFunction (org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction)7 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)7 ArrayList (java.util.ArrayList)6 CPOperand (org.apache.sysml.runtime.instructions.cp.CPOperand)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 AggregateDropCorrectionFunction (org.apache.sysml.runtime.instructions.spark.functions.AggregateDropCorrectionFunction)6 AggregateOperator (org.apache.sysml.runtime.matrix.operators.AggregateOperator)6 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)5 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)5 ValueType (org.apache.sysml.parser.Expression.ValueType)4