use of org.apache.sysml.yarn.ropt.MRJobResourceInstruction in project incubator-systemml by apache.
the class CostEstimatorStaticRuntime method getMRJobInstTimeEstimate.
@Override
protected double getMRJobInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) throws DMLRuntimeException {
MRJobInstruction jinst = (MRJobInstruction) inst;
//infrastructure properties
boolean localJob = InfrastructureAnalyzer.isLocalMode();
int maxPMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
int maxPRed = Math.min(InfrastructureAnalyzer.getRemoteParallelReduceTasks(), ConfigurationManager.getNumReducers());
double blocksize = ((double) InfrastructureAnalyzer.getHDFSBlockSize()) / (1024 * 1024);
//correction max number of mappers/reducers on yarn clusters
if (InfrastructureAnalyzer.isYarnEnabled()) {
maxPMap = (int) Math.max(maxPMap, YarnClusterAnalyzer.getNumCores());
//artificially reduced by factor 2, in order to prefer map-side processing even if smaller degree of parallelism
maxPRed = (int) Math.max(maxPRed, YarnClusterAnalyzer.getNumCores() / 2 / 2);
}
//yarn-specific: take degree of parallelism into account
if (jinst instanceof MRJobResourceInstruction) {
int maxTasks = (int) ((MRJobResourceInstruction) jinst).getMaxMRTasks();
maxPMap = Math.min(maxPMap, maxTasks);
maxPRed = Math.min(maxPRed, maxTasks);
}
//job properties
boolean mapOnly = jinst.isMapOnly();
String rdInst = jinst.getIv_randInstructions();
String rrInst = jinst.getIv_recordReaderInstructions();
String mapInst = jinst.getIv_instructionsInMapper();
String shfInst = jinst.getIv_shuffleInstructions();
String aggInst = jinst.getIv_aggInstructions();
String otherInst = jinst.getIv_otherInstructions();
byte[] inIx = getInputIndexes(jinst.getInputVars());
byte[] retIx = jinst.getIv_resultIndices();
byte[] mapOutIx = getMapOutputIndexes(inIx, retIx, rdInst, mapInst, shfInst, aggInst, otherInst);
int numMap = computeNumMapTasks(vs, inIx, blocksize, maxPMap, jinst.getJobType());
int numPMap = Math.min(numMap, maxPMap);
//effective map dop
int numEPMap = Math.max(Math.min(numMap, maxPMap / 2), 1);
int numRed = computeNumReduceTasks(vs, mapOutIx, jinst.getJobType());
int numPRed = Math.min(numRed, maxPRed);
//effective reduce dop
int numEPRed = Math.max(Math.min(numRed, maxPRed / 2), 1);
LOG.debug("Meta nmap = " + numMap + ", nred = " + numRed + "; npmap = " + numPMap + ", npred = " + numPRed + "; nepmap = " + numEPMap + ", nepred = " + numEPRed);
//step 0: export if inputs in mem
double exportCosts = 0;
for (int i = 0; i < jinst.getInputVars().length; i++) if (vs[i]._inmem)
exportCosts += getHDFSWriteTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
//step 1: MR job / task latency (normalization by effective dop)
double jobLatencyCosts = localJob ? DEFAULT_MR_JOB_LATENCY_LOCAL : DEFAULT_MR_JOB_LATENCY_REMOTE;
double taskLatencyCost = (numMap / numEPMap + numEPRed) * (localJob ? DEFAULT_MR_TASK_LATENCY_LOCAL : DEFAULT_MR_TASK_LATENCY_REMOTE);
double latencyCosts = jobLatencyCosts + taskLatencyCost;
//step 2: parallel read of inputs (normalization by effective dop)
double hdfsReadCosts = 0;
for (int i = 0; i < jinst.getInputVars().length; i++) hdfsReadCosts += getHDFSReadTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
hdfsReadCosts /= numEPMap;
//step 3: parallel MR instructions
String[] mapperInst = new String[] { rdInst, rrInst, mapInst };
String[] reducerInst = new String[] { shfInst, aggInst, otherInst };
//map instructions compute/distcache read (normalization by effective dop)
//read through distributed cache
double mapDCReadCost = 0;
//map compute cost
double mapCosts = 0;
double shuffleCosts = 0;
//reduce compute costs
double reduceCosts = 0;
for (String instCat : mapperInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
//map compute costs
Object[] o = extractMRInstStatistics(tmp, vs);
String opcode = InstructionUtils.getOpCode(tmp);
mapCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
//dist cache read costs
int dcIndex = getDistcacheIndex(tmp);
if (dcIndex >= 0) {
mapDCReadCost += Math.min(getFSReadTime(vs[dcIndex]._rlen, vs[dcIndex]._clen, vs[dcIndex].getSparsity()), //32MB partitions
getFSReadTime(DistributedCacheInput.PARTITION_SIZE, 1, 1.0)) * //read in each task
numMap;
}
}
}
mapCosts /= numEPMap;
mapDCReadCost /= numEPMap;
if (!mapOnly) {
//shuffle costs (normalization by effective map/reduce dop)
for (int i = 0; i < mapOutIx.length; i++) {
shuffleCosts += (getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) * 4 / numEPRed + getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed);
//TODO this is a workaround - we need to address the number of map output blocks in a more systematic way
for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
if (InstructionUtils.getMRType(tmp) == MRINSTRUCTION_TYPE.Aggregate)
shuffleCosts += numMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed;
}
}
}
//reduce instructions compute (normalization by effective dop)
for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
for (String tmp : linst) {
Object[] o = extractMRInstStatistics(tmp, vs);
if (InstructionUtils.getMRType(tmp) == MRINSTRUCTION_TYPE.Aggregate)
o[1] = new String[] { String.valueOf(numMap) };
String opcode = InstructionUtils.getOpCode(tmp);
reduceCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
}
}
reduceCosts /= numEPRed;
}
//step 4: parallel write of outputs (normalization by effective dop)
double hdfsWriteCosts = 0;
for (int i = 0; i < jinst.getOutputVars().length; i++) {
hdfsWriteCosts += getHDFSWriteTime(vs[retIx[i]]._rlen, vs[retIx[i]]._clen, vs[retIx[i]].getSparsity());
}
hdfsWriteCosts /= ((mapOnly) ? numEPMap : numEPRed);
//debug output
if (LOG.isDebugEnabled()) {
LOG.debug("Costs Export = " + exportCosts);
LOG.debug("Costs Latency = " + latencyCosts);
LOG.debug("Costs HDFS Read = " + hdfsReadCosts);
LOG.debug("Costs Distcache Read = " + mapDCReadCost);
LOG.debug("Costs Map Exec = " + mapCosts);
LOG.debug("Costs Shuffle = " + shuffleCosts);
LOG.debug("Costs Reduce Exec = " + reduceCosts);
LOG.debug("Costs HDFS Write = " + hdfsWriteCosts);
}
//aggregate individual cost factors
return exportCosts + latencyCosts + hdfsReadCosts + mapCosts + mapDCReadCost + shuffleCosts + reduceCosts + hdfsWriteCosts;
}
Aggregations