use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RemoteDPParForSparkWorker method collectBinaryCellInput.
/**
* Collects a matrixblock partition from a given input iterator over
* binary cells.
*
* Note it reuses the instance attribute _partition - multiple calls
* will overwrite the result.
*
* @param valueList iterable writables
* @return matrix block
* @throws IOException if IOException occurs
*/
private MatrixBlock collectBinaryCellInput(Iterable<Writable> valueList) throws IOException {
MatrixBlock partition = null;
// reset reuse block, keep configured representation
if (_tSparseCol)
partition = new MatrixBlock(_clen, _rlen, true);
else
partition = new MatrixBlock(_rlen, _clen, false);
switch(_dpf) {
case ROW_WISE:
while (valueList.iterator().hasNext()) {
PairWritableCell pairValue = (PairWritableCell) valueList.iterator().next();
if (pairValue.indexes.getColumnIndex() < 0)
// cells used to ensure empty partitions
continue;
partition.quickSetValue(0, (int) pairValue.indexes.getColumnIndex() - 1, pairValue.cell.getValue());
}
break;
case COLUMN_WISE:
while (valueList.iterator().hasNext()) {
PairWritableCell pairValue = (PairWritableCell) valueList.iterator().next();
if (pairValue.indexes.getRowIndex() < 0)
// cells used to ensure empty partitions
continue;
if (_tSparseCol)
partition.appendValue(0, (int) pairValue.indexes.getRowIndex() - 1, pairValue.cell.getValue());
else
partition.quickSetValue((int) pairValue.indexes.getRowIndex() - 1, 0, pairValue.cell.getValue());
}
break;
default:
throw new IOException("Partition format not yet supported in fused partition-execute: " + _dpf);
}
// post-processing: cleanups if required
try {
if (partition.isInSparseFormat() && _tSparseCol)
partition.sortSparseRows();
partition.recomputeNonZeros();
partition.examSparsity();
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
}
return partition;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class RemoteParForMR method runJob.
public static // inputs
RemoteParForJobReturn runJob(// inputs
long pfid, // inputs
String program, // inputs
String taskFile, // inputs
String resultFile, // inputs
MatrixObject colocatedDPMatrixObj, // opt params
boolean enableCPCaching, // opt params
int numMappers, // opt params
int replication, // opt params
int max_retry, // opt params
long minMem, // opt params
boolean jvmReuse) {
RemoteParForJobReturn ret = null;
String jobname = "ParFor-EMR";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
JobConf job;
job = new JobConf(RemoteParForMR.class);
job.setJobName(jobname + pfid);
// maintain dml script counters
Statistics.incrementNoOfCompiledMRJobs();
try {
// ///
// configure the MR job
// set arbitrary CP program blocks that will perform in the mapper
MRJobConfiguration.setProgramBlocks(job, program);
// enable/disable caching
MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
// set mappers, reducers, combiners
// map-only
job.setMapperClass(RemoteParWorkerMapper.class);
// set input format (one split per row, NLineInputFormat default N=1)
if (ParForProgramBlock.ALLOW_DATA_COLOCATION && colocatedDPMatrixObj != null) {
job.setInputFormat(RemoteParForColocatedNLineInputFormat.class);
MRJobConfiguration.setPartitioningFormat(job, colocatedDPMatrixObj.getPartitionFormat());
MatrixCharacteristics mc = colocatedDPMatrixObj.getMatrixCharacteristics();
MRJobConfiguration.setPartitioningBlockNumRows(job, mc.getRowsPerBlock());
MRJobConfiguration.setPartitioningBlockNumCols(job, mc.getColsPerBlock());
MRJobConfiguration.setPartitioningFilename(job, colocatedDPMatrixObj.getFileName());
} else // default case
{
job.setInputFormat(NLineInputFormat.class);
}
// set the input path and output path
FileInputFormat.setInputPaths(job, new Path(taskFile));
// set output format
job.setOutputFormat(SequenceFileOutputFormat.class);
// set output path
MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
FileOutputFormat.setOutputPath(job, new Path(resultFile));
// set the output key, value schema
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
// ////
// set optimization parameters
// set the number of mappers and reducers
// numMappers
job.setNumMapTasks(numMappers);
job.setNumReduceTasks(0);
// job.setInt("mapred.map.tasks.maximum", 1); //system property
// job.setInt("mapred.tasktracker.tasks.maximum",1); //system property
// job.setInt("mapred.jobtracker.maxtasks.per.job",1); //system property
// set jvm memory size (if require)
String memKey = MRConfigurationNames.MR_CHILD_JAVA_OPTS;
if (minMem > 0 && minMem > InfrastructureAnalyzer.extractMaxMemoryOpt(job.get(memKey))) {
InfrastructureAnalyzer.setMaxMemoryOpt(job, memKey, minMem);
LOG.warn("Forcing '" + memKey + "' to -Xmx" + minMem / (1024 * 1024) + "M.");
}
// disable automatic tasks timeouts and speculative task exec
job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
job.setMapSpeculativeExecution(false);
// set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
// set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
// enables the reuse of JVMs (multiple tasks per MR task)
if (jvmReuse)
// unlimited
job.setNumTasksToExecutePerJvm(-1);
// set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
// 8MB
job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8);
// set the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
// set the max number of retries per map task
// disabled job-level configuration to respect cluster configuration
// note: this refers to hadoop2, hence it never had effect on mr1
// job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
// set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
// ///
// execute the MR job
RunningJob runjob = JobClient.runJob(job);
// Process different counters
Statistics.incrementNoOfExecutedMRJobs();
Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
}
// read all files of result variables and prepare for return
LocalVariableMap[] results = readResultFile(job, resultFile);
ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
} finally {
// remove created files
try {
MapReduceTool.deleteFileIfExistOnHDFS(new Path(taskFile), job);
MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
}
}
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
}
return ret;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ResultMergeLocalFile method executeSerialMerge.
@Override
public MatrixObject executeSerialMerge() {
// always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (local, file): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
try {
// collect all relevant inputs
ArrayList<MatrixObject> inMO = new ArrayList<>();
for (MatrixObject in : _inputs) {
// check for empty inputs (no iterations executed)
if (in != null && in != _output) {
// ensure that input file resides on disk
in.exportData();
// add to merge list
inMO.add(in);
}
}
if (!inMO.isEmpty()) {
// ensure that outputfile (for comparison) resides on disk
_output.exportData();
// actual merge
merge(_outputFName, _output, inMO);
// create new output matrix (e.g., to prevent potential export<->read file access conflict
moNew = createNewMatrixObject(_output, inMO);
} else {
// return old matrix, to prevent copy
moNew = _output;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ResultMergeLocalFile method createTextCellResultFile.
private void createTextCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MetaDataFormat metadata, boolean withCompare) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
long rlen = mc.getRows();
long clen = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
try (BufferedWriter out = new BufferedWriter(new OutputStreamWriter(fs.create(path, true)))) {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
boolean written = false;
for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
MatrixBlock mb = null;
long row_offset = (brow - 1) * brlen + 1;
long col_offset = (bcol - 1) * bclen + 1;
if (dir.exists()) {
if (// WITH COMPARE BLOCK
withCompare && dir2.exists()) {
// copy only values that are different from the original
String[] lnames2 = dir2.list();
if (// there should be exactly 1 compare block
lnames2.length != 1)
throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
boolean appendOnly = mb.isInSparseFormat();
DenseBlock compare = DataConverter.convertToDenseBlock(mb, false);
for (String lname : dir.list()) {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithComp(mb, tmp, compare);
}
// sort sparse and exam sparsity due to append-only
if (appendOnly && !_isAccum)
mb.sortSparseRows();
// change sparsity if required after
mb.examSparsity();
} else // WITHOUT COMPARE BLOCK
{
// copy all non-zeros from all workers
boolean appendOnly = false;
for (String lname : dir.list()) {
if (mb == null) {
mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
appendOnly = mb.isInSparseFormat();
} else {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithoutComp(mb, tmp, appendOnly);
}
}
// sort sparse due to append-only
if (appendOnly && !_isAccum)
mb.sortSparseRows();
// change sparsity if required after
mb.examSparsity();
}
}
// write the block to text cell
if (mb != null) {
if (mb.isInSparseFormat()) {
Iterator<IJV> iter = mb.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
sb.append(row_offset + lcell.getI());
sb.append(' ');
sb.append(col_offset + lcell.getJ());
sb.append(' ');
sb.append(lcell.getV());
sb.append('\n');
out.write(sb.toString());
sb.setLength(0);
written = true;
}
} else {
for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
double lvalue = mb.getValueDenseUnsafe(i, j);
if (// for nnz
lvalue != 0) {
sb.append(row_offset + i);
sb.append(' ');
sb.append(col_offset + j);
sb.append(' ');
sb.append(lvalue);
sb.append('\n');
out.write(sb.toString());
sb.setLength(0);
written = true;
}
}
}
}
}
if (!written)
out.write(IOUtilFunctions.EMPTY_TEXT_LINE);
}
}
use of org.apache.sysml.runtime.DMLRuntimeException in project incubator-systemml by apache.
the class ResultMergeLocalFile method mergeBinaryBlockWithComp.
private void mergeBinaryBlockWithComp(String fnameNew, MatrixObject outMo, ArrayList<MatrixObject> inMO) {
String fnameStaging = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
String fnameStagingCompare = LocalFileUtils.getUniqueWorkingDir(LocalFileUtils.CATEGORY_RESULTMERGE);
try {
// delete target file if already exists
MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
// Step 0) write compare blocks to staging area (if necessary)
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (local, file): Create merge compare matrix for output " + outMo.hashCode() + " (fname=" + outMo.getFileName() + ")");
createBinaryBlockStagingFile(fnameStagingCompare, outMo);
// Step 1) read and write blocks to staging area
for (MatrixObject in : inMO) {
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (local, file): Merge input " + in.hashCode() + " (fname=" + in.getFileName() + ")");
createBinaryBlockStagingFile(fnameStaging, in);
}
// Step 2) read blocks, consolidate, and write to HDFS
createBinaryBlockResultFile(fnameStaging, fnameStagingCompare, fnameNew, (MetaDataFormat) outMo.getMetaData(), true);
} catch (Exception ex) {
throw new DMLRuntimeException("Unable to merge binary block results.", ex);
}
LocalFileUtils.cleanupWorkingDirectory(fnameStaging);
LocalFileUtils.cleanupWorkingDirectory(fnameStagingCompare);
}
Aggregations