use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.
the class ScriptExecutor method restoreInputsInSymbolTable.
/**
* Restore the input variables in the symbol table after script execution.
*/
protected void restoreInputsInSymbolTable() {
Map<String, Object> inputs = script.getInputs();
Map<String, Metadata> inputMetadata = script.getInputMetadata();
LocalVariableMap symbolTable = script.getSymbolTable();
Set<String> inputVariables = script.getInputVariables();
for (String inputVariable : inputVariables) {
if (symbolTable.get(inputVariable) == null) {
// retrieve optional metadata if it exists
Metadata m = inputMetadata.get(inputVariable);
script.in(inputVariable, inputs.get(inputVariable), m);
}
}
}
use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.
the class RemoteDPParForMR method readResultFile.
/**
* Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
* on the workerID. Without JVM reuse each task refers to a unique workerID, so we
* will not find any duplicates. With JVM reuse, however, each slot refers to a workerID,
* and there are duplicate filenames due to partial aggregation and overwrite of fname
* (the RemoteParWorkerMapper ensures uniqueness of those files independent of the
* runtime implementation).
*
* @param job job configuration
* @param fname file name
* @return array of local variable maps
* @throws IOException if IOException occurs
*/
@SuppressWarnings("deprecation")
public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws IOException {
HashMap<Long, LocalVariableMap> tmp = new HashMap<>();
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// workerID
LongWritable key = new LongWritable();
// serialized var header (incl filename)
Text value = new Text();
int countAll = 0;
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
if (!tmp.containsKey(key.get()))
tmp.put(key.get(), new LocalVariableMap());
Object[] dat = ProgramConverter.parseDataObject(value.toString());
tmp.get(key.get()).put((String) dat[0], (Data) dat[1]);
countAll++;
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
LOG.debug("Num remote worker results (before deduplication): " + countAll);
LOG.debug("Num remote worker results: " + tmp.size());
// create return array
return tmp.values().toArray(new LocalVariableMap[0]);
}
use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.
the class RemoteParForMR method readResultFile.
/**
* Result file contains hierarchy of workerID-resultvar(incl filename). We deduplicate
* on the workerID. Without JVM reuse each task refers to a unique workerID, so we
* will not find any duplicates. With JVM reuse, however, each slot refers to a workerID,
* and there are duplicate filenames due to partial aggregation and overwrite of fname
* (the RemoteParWorkerMapper ensures uniqueness of those files independent of the
* runtime implementation).
*
* @param job job configuration
* @param fname file name
* @return array of local variable maps
* @throws IOException if IOException occurs
*/
@SuppressWarnings("deprecation")
public static LocalVariableMap[] readResultFile(JobConf job, String fname) throws IOException {
HashMap<Long, LocalVariableMap> tmp = new HashMap<>();
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// workerID
LongWritable key = new LongWritable();
// serialized var header (incl filename)
Text value = new Text();
int countAll = 0;
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
if (!tmp.containsKey(key.get()))
tmp.put(key.get(), new LocalVariableMap());
Object[] dat = ProgramConverter.parseDataObject(value.toString());
tmp.get(key.get()).put((String) dat[0], (Data) dat[1]);
countAll++;
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
LOG.debug("Num remote worker results (before deduplication): " + countAll);
LOG.debug("Num remote worker results: " + tmp.size());
// create return array
return tmp.values().toArray(new LocalVariableMap[0]);
}
use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.
the class RemoteParForMR method runJob.
public static // inputs
RemoteParForJobReturn runJob(// inputs
long pfid, // inputs
String program, // inputs
String taskFile, // inputs
String resultFile, // inputs
MatrixObject colocatedDPMatrixObj, // opt params
boolean enableCPCaching, // opt params
int numMappers, // opt params
int replication, // opt params
int max_retry, // opt params
long minMem, // opt params
boolean jvmReuse) {
RemoteParForJobReturn ret = null;
String jobname = "ParFor-EMR";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
JobConf job;
job = new JobConf(RemoteParForMR.class);
job.setJobName(jobname + pfid);
// maintain dml script counters
Statistics.incrementNoOfCompiledMRJobs();
try {
// ///
// configure the MR job
// set arbitrary CP program blocks that will perform in the mapper
MRJobConfiguration.setProgramBlocks(job, program);
// enable/disable caching
MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
// set mappers, reducers, combiners
// map-only
job.setMapperClass(RemoteParWorkerMapper.class);
// set input format (one split per row, NLineInputFormat default N=1)
if (ParForProgramBlock.ALLOW_DATA_COLOCATION && colocatedDPMatrixObj != null) {
job.setInputFormat(RemoteParForColocatedNLineInputFormat.class);
MRJobConfiguration.setPartitioningFormat(job, colocatedDPMatrixObj.getPartitionFormat());
MatrixCharacteristics mc = colocatedDPMatrixObj.getMatrixCharacteristics();
MRJobConfiguration.setPartitioningBlockNumRows(job, mc.getRowsPerBlock());
MRJobConfiguration.setPartitioningBlockNumCols(job, mc.getColsPerBlock());
MRJobConfiguration.setPartitioningFilename(job, colocatedDPMatrixObj.getFileName());
} else // default case
{
job.setInputFormat(NLineInputFormat.class);
}
// set the input path and output path
FileInputFormat.setInputPaths(job, new Path(taskFile));
// set output format
job.setOutputFormat(SequenceFileOutputFormat.class);
// set output path
MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
FileOutputFormat.setOutputPath(job, new Path(resultFile));
// set the output key, value schema
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
// ////
// set optimization parameters
// set the number of mappers and reducers
// numMappers
job.setNumMapTasks(numMappers);
job.setNumReduceTasks(0);
// job.setInt("mapred.map.tasks.maximum", 1); //system property
// job.setInt("mapred.tasktracker.tasks.maximum",1); //system property
// job.setInt("mapred.jobtracker.maxtasks.per.job",1); //system property
// set jvm memory size (if require)
String memKey = MRConfigurationNames.MR_CHILD_JAVA_OPTS;
if (minMem > 0 && minMem > InfrastructureAnalyzer.extractMaxMemoryOpt(job.get(memKey))) {
InfrastructureAnalyzer.setMaxMemoryOpt(job, memKey, minMem);
LOG.warn("Forcing '" + memKey + "' to -Xmx" + minMem / (1024 * 1024) + "M.");
}
// disable automatic tasks timeouts and speculative task exec
job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
job.setMapSpeculativeExecution(false);
// set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
// set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
// enables the reuse of JVMs (multiple tasks per MR task)
if (jvmReuse)
// unlimited
job.setNumTasksToExecutePerJvm(-1);
// set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
// 8MB
job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8);
// set the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
// set the max number of retries per map task
// disabled job-level configuration to respect cluster configuration
// note: this refers to hadoop2, hence it never had effect on mr1
// job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
// set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
// ///
// execute the MR job
RunningJob runjob = JobClient.runJob(job);
// Process different counters
Statistics.incrementNoOfExecutedMRJobs();
Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
}
// read all files of result variables and prepare for return
LocalVariableMap[] results = readResultFile(job, resultFile);
ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
} finally {
// remove created files
try {
MapReduceTool.deleteFileIfExistOnHDFS(new Path(taskFile), job);
MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
}
}
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
}
return ret;
}
use of org.apache.sysml.runtime.controlprogram.LocalVariableMap in project incubator-systemml by apache.
the class CostEstimationWrapper method getTimeEstimate.
public static double getTimeEstimate(ProgramBlock pb, ExecutionContext ec, boolean recursive) {
Timing time = new Timing(true);
HashMap<String, VarStats> stats = new HashMap<>();
LocalVariableMap vars = (ec != null) ? ec.getVariables() : new LocalVariableMap();
double costs = _costEstim.getTimeEstimate(pb, vars, stats, recursive);
LOG.debug("Finished estimation in " + time.stop() + "ms.");
return costs;
}
Aggregations