use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class RemoteParForSpark method runJob.
public static RemoteParForJobReturn runJob(long pfid, String prog, HashMap<String, byte[]> clsMap, List<Task> tasks, ExecutionContext ec, boolean cpCaching, int numMappers) {
String jobname = "ParFor-ESP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaSparkContext sc = sec.getSparkContext();
// initialize accumulators for tasks/iterations
LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
LongAccumulator aIters = sc.sc().longAccumulator("iterations");
// reset cached shared inputs for correctness in local mode
long jobid = _jobID.getNextID();
if (InfrastructureAnalyzer.isLocalMode())
RemoteParForSparkWorker.cleanupCachedVariables(jobid);
// run remote_spark parfor job
// (w/o lazy evaluation to fit existing parfor framework, e.g., result merge)
List<Tuple2<Long, String>> out = // create rdd of parfor tasks
sc.parallelize(tasks, tasks.size()).flatMapToPair(new RemoteParForSparkWorker(jobid, prog, clsMap, cpCaching, aTasks, aIters)).collect();
// de-serialize results
LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
// get accumulator value
int numTasks = aTasks.value().intValue();
// get accumulator value
int numIters = aIters.value().intValue();
// create output symbol table entries
RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS)
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
return ret;
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class DMLScript method execute.
// /////////////////////////////
// private internal interface
// (core compilation and execute)
// //////
/**
* The running body of DMLScript execution. This method should be called after execution properties have been correctly set,
* and customized parameters have been put into _argVals
*
* @param dmlScriptStr DML script string
* @param fnameOptConfig configuration file
* @param argVals map of argument values
* @param allArgs arguments
* @param scriptType type of script (DML or PyDML)
* @throws IOException if IOException occurs
*/
private static void execute(String dmlScriptStr, String fnameOptConfig, Map<String, String> argVals, String[] allArgs, ScriptType scriptType) throws IOException {
SCRIPT_TYPE = scriptType;
// print basic time and environment info
printStartExecInfo(dmlScriptStr);
// Step 1: parse configuration files & write any configuration specific global variables
DMLConfig dmlconf = DMLConfig.readConfigurationFile(fnameOptConfig);
ConfigurationManager.setGlobalConfig(dmlconf);
CompilerConfig cconf = OptimizerUtils.constructCompilerConfig(dmlconf);
ConfigurationManager.setGlobalConfig(cconf);
LOG.debug("\nDML config: \n" + dmlconf.getConfigInfo());
// Sets the GPUs to use for this process (a range, all GPUs, comma separated list or a specific GPU)
GPUContextPool.AVAILABLE_GPUS = dmlconf.getTextValue(DMLConfig.AVAILABLE_GPUS);
String evictionPolicy = dmlconf.getTextValue(DMLConfig.GPU_EVICTION_POLICY).toUpperCase();
try {
DMLScript.GPU_EVICTION_POLICY = EvictionPolicy.valueOf(evictionPolicy);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Unsupported eviction policy:" + evictionPolicy);
}
// Step 2: set local/remote memory if requested (for compile in AM context)
if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
DMLAppMasterUtils.setupConfigRemoteMaxMemory(dmlconf);
}
// Step 3: parse dml script
Statistics.startCompileTimer();
ParserWrapper parser = ParserFactory.createParser(scriptType);
DMLProgram prog = parser.parse(DML_FILE_PATH_ANTLR_PARSER, dmlScriptStr, argVals);
// Step 4: construct HOP DAGs (incl LVA, validate, and setup)
DMLTranslator dmlt = new DMLTranslator(prog);
dmlt.liveVariableAnalysis(prog);
dmlt.validateParseTree(prog);
dmlt.constructHops(prog);
// init working directories (before usage by following compilation steps)
initHadoopExecution(dmlconf);
// Step 5: rewrite HOP DAGs (incl IPA and memory estimates)
dmlt.rewriteHopsDAG(prog);
// Step 6: construct lops (incl exec type and op selection)
dmlt.constructLops(prog);
if (LOG.isDebugEnabled()) {
LOG.debug("\n********************** LOPS DAG *******************");
dmlt.printLops(prog);
dmlt.resetLopsDAGVisitStatus(prog);
}
// Step 7: generate runtime program, incl codegen
Program rtprog = dmlt.getRuntimeProgram(prog, dmlconf);
// launch SystemML appmaster (if requested and not already in launched AM)
if (dmlconf.getBooleanValue(DMLConfig.YARN_APPMASTER)) {
if (!isActiveAM() && DMLYarnClientProxy.launchDMLYarnAppmaster(dmlScriptStr, dmlconf, allArgs, rtprog))
// if AM launch unsuccessful, fall back to normal execute
return;
if (// in AM context (not failed AM launch)
isActiveAM())
DMLAppMasterUtils.setupProgramMappingRemoteMaxMemory(rtprog);
}
// Step 9: prepare statistics [and optional explain output]
// count number compiled MR jobs / SP instructions
ExplainCounts counts = Explain.countDistributedOperations(rtprog);
Statistics.resetNoOfCompiledJobs(counts.numJobs);
// explain plan of program (hops or runtime)
if (EXPLAIN != ExplainType.NONE)
LOG.info(Explain.display(prog, rtprog, EXPLAIN, counts));
Statistics.stopCompileTimer();
// double costs = CostEstimationWrapper.getTimeEstimate(rtprog, ExecutionContextFactory.createContext());
// System.out.println("Estimated costs: "+costs);
// Step 10: execute runtime program
ExecutionContext ec = null;
try {
ec = ExecutionContextFactory.createContext(rtprog);
ScriptExecutorUtils.executeRuntimeProgram(rtprog, ec, dmlconf, STATISTICS ? STATISTICS_COUNT : 0);
} finally {
if (ec != null && ec instanceof SparkExecutionContext)
((SparkExecutionContext) ec).close();
LOG.info("END DML run " + getDateTime());
// cleanup scratch_space and all working dirs
cleanupHadoopExecution(dmlconf);
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class DataPartitionerRemoteSpark method partitionMatrix.
@Override
@SuppressWarnings("unchecked")
protected void partitionMatrix(MatrixObject in, String fnameNew, InputInfo ii, OutputInfo oi, long rlen, long clen, int brlen, int bclen) {
String jobname = "ParFor-DPSP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) _ec;
try {
// cleanup existing output files
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
// get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> inRdd = (JavaPairRDD<MatrixIndexes, MatrixBlock>) sec.getRDDHandleForMatrixObject(in, InputInfo.BinaryBlockInputInfo);
// determine degree of parallelism
MatrixCharacteristics mc = in.getMatrixCharacteristics();
int numRed = (int) determineNumReducers(inRdd, mc, _numRed);
// run spark remote data partition job
DataPartitionerRemoteSparkMapper dpfun = new DataPartitionerRemoteSparkMapper(mc, ii, oi, _format, _n);
DataPartitionerRemoteSparkReducer wfun = new DataPartitionerRemoteSparkReducer(fnameNew, oi, _replication);
// partition the input blocks
inRdd.flatMapToPair(dpfun).groupByKey(// group partition blocks
numRed).foreach(// write partitions to hdfs
wfun);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS) {
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class LocalParWorker method run.
@Override
public void run() {
// monitoring start
Timing time1 = (_monitor ? new Timing(true) : null);
// spark context creation (if data cached already created)
if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
SparkExecutionContext sec = (SparkExecutionContext) _ec;
sec.setThreadLocalSchedulerPool("parforPool" + _workerID);
}
// Initialize this GPUContext to this thread
if (DMLScript.USE_ACCELERATOR) {
try {
_ec.getGPUContext(0).initializeThread();
} catch (DMLRuntimeException e) {
LOG.error("Error executing task because of failure in GPU backend: ", e);
LOG.error("Stopping LocalParWorker.");
return;
}
}
// setup compiler config for worker thread
ConfigurationManager.setLocalConfig(_cconf);
// continuous execution (execute tasks until (1) stopped or (2) no more tasks)
Task lTask = null;
while (!_stopped) {
// dequeue the next task (abort on NO_MORE_TASKS or error)
try {
lTask = _taskQueue.dequeueTask();
if (// task queue closed (no more tasks)
lTask == LocalTaskQueue.NO_MORE_TASKS)
// normal end of parallel worker
break;
} catch (Exception ex) {
// abort on taskqueue error
LOG.warn("Error reading from task queue: " + ex.getMessage());
LOG.warn("Stopping LocalParWorker.");
// no exception thrown to prevent blocking on join
break;
}
// execute the task sequentially (re-try on error)
boolean success = false;
int retrys = _max_retry;
while (!success) {
try {
// /////
// core execution (see ParWorker)
executeTask(lTask);
success = true;
} catch (Exception ex) {
LOG.error("Failed to execute " + lTask.toString() + ", retry:" + retrys, ex);
if (retrys > 0)
// retry on task error
retrys--;
else {
// abort on no remaining retrys
LOG.error("Error executing task: ", ex);
LOG.error("Stopping LocalParWorker.");
// no exception thrown to prevent blocking on join
break;
}
}
}
}
// setup fair scheduler pool for worker thread
if (OptimizerUtils.isSparkExecutionMode() && SparkExecutionContext.isSparkContextCreated()) {
SparkExecutionContext sec = (SparkExecutionContext) _ec;
sec.cleanupThreadLocalSchedulerPool();
}
if (_monitor) {
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMTASKS, _numTasks);
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_NUMITERS, _numIters);
StatisticMonitor.putPWStat(_workerID, Stat.PARWRK_EXEC_T, time1.stop());
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class MatrixBuiltinSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
//execute unary builtin operation
UnaryOperator uop = (UnaryOperator) _optr;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new RDDMatrixBuiltinUnaryOp(uop));
//set output RDD
updateUnaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
Aggregations