use of org.apache.spark.util.LongAccumulator in project incubator-systemml by apache.
the class RemoteDPParForSpark method runJob.
public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, PartitionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
String jobname = "ParFor-DPESP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaSparkContext sc = sec.getSparkContext();
// prepare input parameters
MatrixObject mo = sec.getMatrixObject(matrixvar);
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
// initialize accumulators for tasks/iterations, and inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
LongAccumulator aIters = sc.sc().longAccumulator("iterations");
// compute number of reducers (to avoid OOMs and reduce memory pressure)
int numParts = SparkUtils.getNumPreferredPartitions(mc, in);
int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
// core parfor datapartition-execute (w/ or w/o shuffle, depending on data characteristics)
RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
JavaPairRDD<Long, Writable> tmp = getPartitionedInput(sec, matrixvar, oi, dpf);
List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapPartitionsToPair(// execute parfor tasks, incl cleanup
efun).collect();
// de-serialize results
LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
// get accumulator value
int numTasks = aTasks.value().intValue();
// get accumulator value
int numIters = aIters.value().intValue();
// create output symbol table entries
RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS) {
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
}
return ret;
}
use of org.apache.spark.util.LongAccumulator in project incubator-systemml by apache.
the class RemoteParForSpark method runJob.
public static RemoteParForJobReturn runJob(long pfid, String prog, HashMap<String, byte[]> clsMap, List<Task> tasks, ExecutionContext ec, boolean cpCaching, int numMappers) {
String jobname = "ParFor-ESP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaSparkContext sc = sec.getSparkContext();
// initialize accumulators for tasks/iterations
LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
LongAccumulator aIters = sc.sc().longAccumulator("iterations");
// reset cached shared inputs for correctness in local mode
long jobid = _jobID.getNextID();
if (InfrastructureAnalyzer.isLocalMode())
RemoteParForSparkWorker.cleanupCachedVariables(jobid);
// run remote_spark parfor job
// (w/o lazy evaluation to fit existing parfor framework, e.g., result merge)
List<Tuple2<Long, String>> out = // create rdd of parfor tasks
sc.parallelize(tasks, tasks.size()).flatMapToPair(new RemoteParForSparkWorker(jobid, prog, clsMap, cpCaching, aTasks, aIters)).collect();
// de-serialize results
LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
// get accumulator value
int numTasks = aTasks.value().intValue();
// get accumulator value
int numIters = aIters.value().intValue();
// create output symbol table entries
RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS)
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
return ret;
}
Aggregations