use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class CostEstimator method maintainMRJobInstVariableStatistics.
private void maintainMRJobInstVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) throws DMLRuntimeException {
MRJobInstruction jobinst = (MRJobInstruction) inst;
//input sizes (varname, index mapping)
String[] inVars = jobinst.getInputVars();
int index = -1;
for (String varname : inVars) {
VarStats vs = stats.get(varname);
if (vs == null)
vs = _unknownStats;
stats.put(String.valueOf(++index), vs);
}
//rand output
String rdInst = jobinst.getIv_randInstructions();
if (rdInst != null && rdInst.length() > 0) {
StringTokenizer st = new StringTokenizer(rdInst, Lop.INSTRUCTION_DELIMITOR);
while (//foreach rand instruction
st.hasMoreTokens()) {
String[] parts = InstructionUtils.getInstructionParts(st.nextToken());
byte outIndex = Byte.parseByte(parts[2]);
long rlen = parts[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[3]);
long clen = parts[4].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[4]);
long brlen = Long.parseLong(parts[5]);
long bclen = Long.parseLong(parts[6]);
long nnz = (long) (Double.parseDouble(parts[9]) * rlen * clen);
VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, false);
stats.put(String.valueOf(outIndex), vs);
}
}
//compute intermediate result indices
HashMap<Byte, MatrixCharacteristics> dims = new HashMap<Byte, MatrixCharacteristics>();
//populate input indices
for (Entry<String, VarStats> e : stats.entrySet()) {
if (UtilFunctions.isIntegerNumber(e.getKey())) {
byte ix = Byte.parseByte(e.getKey());
VarStats vs = e.getValue();
if (vs != null) {
MatrixCharacteristics mc = new MatrixCharacteristics(vs._rlen, vs._clen, (int) vs._brlen, (int) vs._bclen, (long) vs._nnz);
dims.put(ix, mc);
}
}
}
//compute dims for all instructions
String[] instCat = new String[] { jobinst.getIv_randInstructions(), jobinst.getIv_recordReaderInstructions(), jobinst.getIv_instructionsInMapper(), jobinst.getIv_shuffleInstructions(), jobinst.getIv_aggInstructions(), jobinst.getIv_otherInstructions() };
for (String linstCat : instCat) if (linstCat != null && linstCat.length() > 0) {
String[] linst = linstCat.split(Instruction.INSTRUCTION_DELIM);
for (String instStr : linst) {
String instStr2 = replaceInstructionPatch(instStr);
MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(instStr2);
MatrixCharacteristics.computeDimension(dims, mrinst);
}
}
//create varstats if necessary
for (Entry<Byte, MatrixCharacteristics> e : dims.entrySet()) {
byte ix = e.getKey();
if (!stats.containsKey(String.valueOf(ix))) {
MatrixCharacteristics mc = e.getValue();
VarStats vs = new VarStats(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros(), false);
stats.put(String.valueOf(ix), vs);
}
}
//map result indexes
String[] outLabels = jobinst.getOutputVars();
byte[] resultIndexes = jobinst.getIv_resultIndices();
for (int i = 0; i < resultIndexes.length; i++) {
String varname = outLabels[i];
VarStats varvs = stats.get(String.valueOf(resultIndexes[i]));
if (varvs == null) {
varvs = stats.get(outLabels[i]);
}
varvs._inmem = false;
stats.put(varname, varvs);
}
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class RemoteParForMR method runJob.
public static //inputs
RemoteParForJobReturn runJob(//inputs
long pfid, //inputs
String program, //inputs
String taskFile, //inputs
String resultFile, //inputs
MatrixObject colocatedDPMatrixObj, //opt params
boolean enableCPCaching, //opt params
int numMappers, //opt params
int replication, //opt params
int max_retry, //opt params
long minMem, //opt params
boolean jvmReuse) throws DMLRuntimeException {
RemoteParForJobReturn ret = null;
String jobname = "ParFor-EMR";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
JobConf job;
job = new JobConf(RemoteParForMR.class);
job.setJobName(jobname + pfid);
//maintain dml script counters
Statistics.incrementNoOfCompiledMRJobs();
try {
/////
//configure the MR job
//set arbitrary CP program blocks that will perform in the mapper
MRJobConfiguration.setProgramBlocks(job, program);
//enable/disable caching
MRJobConfiguration.setParforCachingConfig(job, enableCPCaching);
//set mappers, reducers, combiners
//map-only
job.setMapperClass(RemoteParWorkerMapper.class);
//set input format (one split per row, NLineInputFormat default N=1)
if (ParForProgramBlock.ALLOW_DATA_COLOCATION && colocatedDPMatrixObj != null) {
job.setInputFormat(RemoteParForColocatedNLineInputFormat.class);
MRJobConfiguration.setPartitioningFormat(job, colocatedDPMatrixObj.getPartitionFormat());
MatrixCharacteristics mc = colocatedDPMatrixObj.getMatrixCharacteristics();
MRJobConfiguration.setPartitioningBlockNumRows(job, mc.getRowsPerBlock());
MRJobConfiguration.setPartitioningBlockNumCols(job, mc.getColsPerBlock());
MRJobConfiguration.setPartitioningFilename(job, colocatedDPMatrixObj.getFileName());
} else //default case
{
job.setInputFormat(NLineInputFormat.class);
}
//set the input path and output path
FileInputFormat.setInputPaths(job, new Path(taskFile));
//set output format
job.setOutputFormat(SequenceFileOutputFormat.class);
//set output path
MapReduceTool.deleteFileIfExistOnHDFS(resultFile);
FileOutputFormat.setOutputPath(job, new Path(resultFile));
//set the output key, value schema
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
//////
//set optimization parameters
//set the number of mappers and reducers
//numMappers
job.setNumMapTasks(numMappers);
job.setNumReduceTasks(0);
//job.setInt("mapred.map.tasks.maximum", 1); //system property
//job.setInt("mapred.tasktracker.tasks.maximum",1); //system property
//job.setInt("mapred.jobtracker.maxtasks.per.job",1); //system property
//set jvm memory size (if require)
String memKey = MRConfigurationNames.MR_CHILD_JAVA_OPTS;
if (minMem > 0 && minMem > InfrastructureAnalyzer.extractMaxMemoryOpt(job.get(memKey))) {
InfrastructureAnalyzer.setMaxMemoryOpt(job, memKey, minMem);
LOG.warn("Forcing '" + memKey + "' to -Xmx" + minMem / (1024 * 1024) + "M.");
}
//disable automatic tasks timeouts and speculative task exec
job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0);
job.setMapSpeculativeExecution(false);
//set up map/reduce memory configurations (if in AM context)
DMLConfig config = ConfigurationManager.getDMLConfig();
DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config);
//set up custom map/reduce configurations
MRJobConfiguration.setupCustomMRConfigurations(job, config);
//enables the reuse of JVMs (multiple tasks per MR task)
if (jvmReuse)
//unlimited
job.setNumTasksToExecutePerJvm(-1);
//set sort io buffer (reduce unnecessary large io buffer, guaranteed memory consumption)
//8MB
job.setInt(MRConfigurationNames.MR_TASK_IO_SORT_MB, 8);
//set the replication factor for the results
job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
//set the max number of retries per map task
// disabled job-level configuration to respect cluster configuration
// note: this refers to hadoop2, hence it never had effect on mr1
//job.setInt(MRConfigurationNames.MR_MAP_MAXATTEMPTS, max_retry);
//set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
/////
// execute the MR job
RunningJob runjob = JobClient.runJob(job);
// Process different counters
Statistics.incrementNoOfExecutedMRJobs();
Group pgroup = runjob.getCounters().getGroup(ParForProgramBlock.PARFOR_COUNTER_GROUP_NAME);
int numTasks = (int) pgroup.getCounter(Stat.PARFOR_NUMTASKS.toString());
int numIters = (int) pgroup.getCounter(Stat.PARFOR_NUMITERS.toString());
if (DMLScript.STATISTICS && !InfrastructureAnalyzer.isLocalMode()) {
Statistics.incrementJITCompileTime(pgroup.getCounter(Stat.PARFOR_JITCOMPILE.toString()));
Statistics.incrementJVMgcCount(pgroup.getCounter(Stat.PARFOR_JVMGC_COUNT.toString()));
Statistics.incrementJVMgcTime(pgroup.getCounter(Stat.PARFOR_JVMGC_TIME.toString()));
Group cgroup = runjob.getCounters().getGroup(CacheableData.CACHING_COUNTER_GROUP_NAME.toString());
CacheStatistics.incrementMemHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_MEM.toString()));
CacheStatistics.incrementFSBuffHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FSBUFF.toString()));
CacheStatistics.incrementFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_FS.toString()));
CacheStatistics.incrementHDFSHits((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_HITS_HDFS.toString()));
CacheStatistics.incrementFSBuffWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FSBUFF.toString()));
CacheStatistics.incrementFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_FS.toString()));
CacheStatistics.incrementHDFSWrites((int) cgroup.getCounter(CacheStatistics.Stat.CACHE_WRITES_HDFS.toString()));
CacheStatistics.incrementAcquireRTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQR.toString()));
CacheStatistics.incrementAcquireMTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_ACQM.toString()));
CacheStatistics.incrementReleaseTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_RLS.toString()));
CacheStatistics.incrementExportTime(cgroup.getCounter(CacheStatistics.Stat.CACHE_TIME_EXP.toString()));
}
// read all files of result variables and prepare for return
LocalVariableMap[] results = readResultFile(job, resultFile);
ret = new RemoteParForJobReturn(runjob.isSuccessful(), numTasks, numIters, results);
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
} finally {
// remove created files
try {
MapReduceTool.deleteFileIfExistOnHDFS(new Path(taskFile), job);
MapReduceTool.deleteFileIfExistOnHDFS(new Path(resultFile), job);
} catch (IOException ex) {
throw new DMLRuntimeException(ex);
}
}
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
Statistics.maintainCPHeavyHitters("MR-Job_" + jobname, t1 - t0);
}
return ret;
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class ProgramConverter method serializeDataObject.
public static String serializeDataObject(String key, Data dat) throws DMLRuntimeException {
// SCHEMA: <name>|<datatype>|<valuetype>|value
// (scalars are serialize by value, matrices by filename)
StringBuilder sb = new StringBuilder();
//prepare data for serialization
String name = key;
DataType datatype = dat.getDataType();
ValueType valuetype = dat.getValueType();
String value = null;
String[] matrixMetaData = null;
switch(datatype) {
case SCALAR:
ScalarObject so = (ScalarObject) dat;
//name = so.getName();
value = so.getStringValue();
break;
case MATRIX:
MatrixObject mo = (MatrixObject) dat;
MatrixFormatMetaData md = (MatrixFormatMetaData) dat.getMetaData();
MatrixCharacteristics mc = md.getMatrixCharacteristics();
value = mo.getFileName();
PartitionFormat partFormat = (mo.getPartitionFormat() != null) ? new PartitionFormat(mo.getPartitionFormat(), mo.getPartitionSize()) : PartitionFormat.NONE;
matrixMetaData = new String[9];
matrixMetaData[0] = String.valueOf(mc.getRows());
matrixMetaData[1] = String.valueOf(mc.getCols());
matrixMetaData[2] = String.valueOf(mc.getRowsPerBlock());
matrixMetaData[3] = String.valueOf(mc.getColsPerBlock());
matrixMetaData[4] = String.valueOf(mc.getNonZeros());
matrixMetaData[5] = InputInfo.inputInfoToString(md.getInputInfo());
matrixMetaData[6] = OutputInfo.outputInfoToString(md.getOutputInfo());
matrixMetaData[7] = String.valueOf(partFormat);
matrixMetaData[8] = String.valueOf(mo.getUpdateType());
break;
default:
throw new DMLRuntimeException("Unable to serialize datatype " + datatype);
}
//serialize data
sb.append(name);
sb.append(DATA_FIELD_DELIM);
sb.append(datatype);
sb.append(DATA_FIELD_DELIM);
sb.append(valuetype);
sb.append(DATA_FIELD_DELIM);
sb.append(value);
if (matrixMetaData != null)
for (int i = 0; i < matrixMetaData.length; i++) {
sb.append(DATA_FIELD_DELIM);
sb.append(matrixMetaData[i]);
}
return sb.toString();
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryBlockResultFile.
@SuppressWarnings("deprecation")
private void createBinaryBlockResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
long rlen = mc.getRows();
long clen = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
//beware ca 50ms
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
try {
MatrixIndexes indexes = new MatrixIndexes();
for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
MatrixBlock mb = null;
if (dir.exists()) {
if (//WITH COMPARE BLOCK
withCompare && dir2.exists()) {
//copy only values that are different from the original
String[] lnames2 = dir2.list();
if (//there should be exactly 1 compare block
lnames2.length != 1)
throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
mb = LocalFileUtils.readMatrixBlockFromLocal(dir2 + "/" + lnames2[0]);
boolean appendOnly = mb.isInSparseFormat();
double[][] compare = DataConverter.convertToDoubleMatrix(mb);
String[] lnames = dir.list();
for (String lname : lnames) {
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
mergeWithComp(mb, tmp, compare);
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
} else //WITHOUT COMPARE BLOCK
{
//copy all non-zeros from all workers
String[] lnames = dir.list();
boolean appendOnly = false;
for (String lname : lnames) {
if (mb == null) {
mb = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
appendOnly = mb.isInSparseFormat();
} else {
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
mergeWithoutComp(mb, tmp, appendOnly);
}
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
}
} else {
//NOTE: whenever runtime does not need all blocks anymore, this can be removed
int maxRow = (int) (((brow - 1) * brlen + brlen < rlen) ? brlen : rlen - (brow - 1) * brlen);
int maxCol = (int) (((bcol - 1) * bclen + bclen < clen) ? bclen : clen - (bcol - 1) * bclen);
mb = new MatrixBlock(maxRow, maxCol, true);
}
//mb.examSparsity(); //done on write anyway and mb not reused
indexes.setIndexes(brow, bcol);
writer.append(indexes, mb);
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.matrix.MatrixCharacteristics in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryCellStagingFile.
@SuppressWarnings("deprecation")
private void createBinaryCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(mo.getFileName());
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
LinkedList<Cell> buffer = new LinkedList<Cell>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
Cell tmp = new Cell(key.getRowIndex(), key.getColumnIndex(), value.getValue());
buffer.addLast(tmp);
if (//periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
//final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
Aggregations