Search in sources :

Example 1 with MRJobResourceInstruction

use of org.apache.sysml.yarn.ropt.MRJobResourceInstruction in project incubator-systemml by apache.

the class CostEstimatorStaticRuntime method getMRJobInstTimeEstimate.

protected double getMRJobInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) throws DMLRuntimeException {
    MRJobInstruction jinst = (MRJobInstruction) inst;
    //infrastructure properties
    boolean localJob = InfrastructureAnalyzer.isLocalMode();
    int maxPMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
    int maxPRed = Math.min(InfrastructureAnalyzer.getRemoteParallelReduceTasks(), ConfigurationManager.getNumReducers());
    double blocksize = ((double) InfrastructureAnalyzer.getHDFSBlockSize()) / (1024 * 1024);
    //correction max number of mappers/reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled()) {
        maxPMap = (int) Math.max(maxPMap, YarnClusterAnalyzer.getNumCores());
        //artificially reduced by factor 2, in order to prefer map-side processing even if smaller degree of parallelism
        maxPRed = (int) Math.max(maxPRed, YarnClusterAnalyzer.getNumCores() / 2 / 2);
    //yarn-specific: take degree of parallelism into account
    if (jinst instanceof MRJobResourceInstruction) {
        int maxTasks = (int) ((MRJobResourceInstruction) jinst).getMaxMRTasks();
        maxPMap = Math.min(maxPMap, maxTasks);
        maxPRed = Math.min(maxPRed, maxTasks);
    //job properties
    boolean mapOnly = jinst.isMapOnly();
    String rdInst = jinst.getIv_randInstructions();
    String rrInst = jinst.getIv_recordReaderInstructions();
    String mapInst = jinst.getIv_instructionsInMapper();
    String shfInst = jinst.getIv_shuffleInstructions();
    String aggInst = jinst.getIv_aggInstructions();
    String otherInst = jinst.getIv_otherInstructions();
    byte[] inIx = getInputIndexes(jinst.getInputVars());
    byte[] retIx = jinst.getIv_resultIndices();
    byte[] mapOutIx = getMapOutputIndexes(inIx, retIx, rdInst, mapInst, shfInst, aggInst, otherInst);
    int numMap = computeNumMapTasks(vs, inIx, blocksize, maxPMap, jinst.getJobType());
    int numPMap = Math.min(numMap, maxPMap);
    //effective map dop
    int numEPMap = Math.max(Math.min(numMap, maxPMap / 2), 1);
    int numRed = computeNumReduceTasks(vs, mapOutIx, jinst.getJobType());
    int numPRed = Math.min(numRed, maxPRed);
    //effective reduce dop
    int numEPRed = Math.max(Math.min(numRed, maxPRed / 2), 1);
    LOG.debug("Meta nmap = " + numMap + ", nred = " + numRed + "; npmap = " + numPMap + ", npred = " + numPRed + "; nepmap = " + numEPMap + ", nepred = " + numEPRed);
    //step 0: export if inputs in mem
    double exportCosts = 0;
    for (int i = 0; i < jinst.getInputVars().length; i++) if (vs[i]._inmem)
        exportCosts += getHDFSWriteTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
    //step 1: MR job / task latency (normalization by effective dop)
    double taskLatencyCost = (numMap / numEPMap + numEPRed) * (localJob ? DEFAULT_MR_TASK_LATENCY_LOCAL : DEFAULT_MR_TASK_LATENCY_REMOTE);
    double latencyCosts = jobLatencyCosts + taskLatencyCost;
    //step 2: parallel read of inputs (normalization by effective dop)
    double hdfsReadCosts = 0;
    for (int i = 0; i < jinst.getInputVars().length; i++) hdfsReadCosts += getHDFSReadTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
    hdfsReadCosts /= numEPMap;
    //step 3: parallel MR instructions
    String[] mapperInst = new String[] { rdInst, rrInst, mapInst };
    String[] reducerInst = new String[] { shfInst, aggInst, otherInst };
    //map instructions compute/distcache read (normalization by effective dop) 
    //read through distributed cache
    double mapDCReadCost = 0;
    //map compute cost
    double mapCosts = 0;
    double shuffleCosts = 0;
    //reduce compute costs
    double reduceCosts = 0;
    for (String instCat : mapperInst) if (instCat != null && instCat.length() > 0) {
        String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
        for (String tmp : linst) {
            //map compute costs
            Object[] o = extractMRInstStatistics(tmp, vs);
            String opcode = InstructionUtils.getOpCode(tmp);
            mapCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
            //dist cache read costs
            int dcIndex = getDistcacheIndex(tmp);
            if (dcIndex >= 0) {
                mapDCReadCost += Math.min(getFSReadTime(vs[dcIndex]._rlen, vs[dcIndex]._clen, vs[dcIndex].getSparsity()), //32MB partitions
                getFSReadTime(DistributedCacheInput.PARTITION_SIZE, 1, 1.0)) * //read in each task
    mapCosts /= numEPMap;
    mapDCReadCost /= numEPMap;
    if (!mapOnly) {
        //shuffle costs (normalization by effective map/reduce dop)
        for (int i = 0; i < mapOutIx.length; i++) {
            shuffleCosts += (getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) * 4 / numEPRed + getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed);
            //TODO this is a workaround - we need to address the number of map output blocks in a more systematic way
            for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
                String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
                for (String tmp : linst) {
                    if (InstructionUtils.getMRType(tmp) == MRINSTRUCTION_TYPE.Aggregate)
                        shuffleCosts += numMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed;
        //reduce instructions compute (normalization by effective dop)
        for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
            String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
            for (String tmp : linst) {
                Object[] o = extractMRInstStatistics(tmp, vs);
                if (InstructionUtils.getMRType(tmp) == MRINSTRUCTION_TYPE.Aggregate)
                    o[1] = new String[] { String.valueOf(numMap) };
                String opcode = InstructionUtils.getOpCode(tmp);
                reduceCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
        reduceCosts /= numEPRed;
    //step 4: parallel write of outputs (normalization by effective dop)
    double hdfsWriteCosts = 0;
    for (int i = 0; i < jinst.getOutputVars().length; i++) {
        hdfsWriteCosts += getHDFSWriteTime(vs[retIx[i]]._rlen, vs[retIx[i]]._clen, vs[retIx[i]].getSparsity());
    hdfsWriteCosts /= ((mapOnly) ? numEPMap : numEPRed);
    //debug output
    if (LOG.isDebugEnabled()) {
        LOG.debug("Costs Export = " + exportCosts);
        LOG.debug("Costs Latency = " + latencyCosts);
        LOG.debug("Costs HDFS Read = " + hdfsReadCosts);
        LOG.debug("Costs Distcache Read = " + mapDCReadCost);
        LOG.debug("Costs Map Exec = " + mapCosts);
        LOG.debug("Costs Shuffle = " + shuffleCosts);
        LOG.debug("Costs Reduce Exec = " + reduceCosts);
        LOG.debug("Costs HDFS Write = " + hdfsWriteCosts);
    //aggregate individual cost factors
    return exportCosts + latencyCosts + hdfsReadCosts + mapCosts + mapDCReadCost + shuffleCosts + reduceCosts + hdfsWriteCosts;
Also used : MRJobResourceInstruction(org.apache.sysml.yarn.ropt.MRJobResourceInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction)


MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)1 MRJobResourceInstruction (org.apache.sysml.yarn.ropt.MRJobResourceInstruction)1