Search in sources :

Example 11 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class CostEstimator method cleanupMRJobVariableStatistics.

private static void cleanupMRJobVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) {
    MRJobInstruction jinst = (MRJobInstruction) inst;
    // get number of indices
    byte[] indexes = jinst.getIv_resultIndices();
    byte maxIx = -1;
    for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i])
        maxIx = indexes[i];
    // remove all stats up to max index
    for (int i = 0; i <= maxIx; i++) {
        VarStats tmp = stats.remove(String.valueOf(i));
        if (tmp != null)
            // all MR job outptus on HDFS
            tmp._inmem = false;
    }
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction)

Example 12 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class CostEstimatorStaticRuntime method getMRJobInstTimeEstimate.

@Override
protected double getMRJobInstTimeEstimate(Instruction inst, VarStats[] vs, String[] args) {
    MRJobInstruction jinst = (MRJobInstruction) inst;
    // infrastructure properties
    boolean localJob = InfrastructureAnalyzer.isLocalMode();
    int maxPMap = InfrastructureAnalyzer.getRemoteParallelMapTasks();
    int maxPRed = Math.min(InfrastructureAnalyzer.getRemoteParallelReduceTasks(), ConfigurationManager.getNumReducers());
    double blocksize = ((double) InfrastructureAnalyzer.getHDFSBlockSize()) / (1024 * 1024);
    // correction max number of mappers/reducers on yarn clusters
    if (InfrastructureAnalyzer.isYarnEnabled()) {
        maxPMap = (int) Math.max(maxPMap, YarnClusterAnalyzer.getNumCores());
        // artificially reduced by factor 2, in order to prefer map-side processing even if smaller degree of parallelism
        maxPRed = (int) Math.max(maxPRed, YarnClusterAnalyzer.getNumCores() / 2 / 2);
    }
    // yarn-specific: take degree of parallelism into account
    if (jinst instanceof MRJobResourceInstruction) {
        int maxTasks = (int) ((MRJobResourceInstruction) jinst).getMaxMRTasks();
        maxPMap = Math.min(maxPMap, maxTasks);
        maxPRed = Math.min(maxPRed, maxTasks);
    }
    // job properties
    boolean mapOnly = jinst.isMapOnly();
    String rdInst = jinst.getIv_randInstructions();
    String rrInst = jinst.getIv_recordReaderInstructions();
    String mapInst = jinst.getIv_instructionsInMapper();
    String shfInst = jinst.getIv_shuffleInstructions();
    String aggInst = jinst.getIv_aggInstructions();
    String otherInst = jinst.getIv_otherInstructions();
    byte[] inIx = getInputIndexes(jinst.getInputVars());
    byte[] retIx = jinst.getIv_resultIndices();
    byte[] mapOutIx = getMapOutputIndexes(inIx, retIx, rdInst, mapInst, shfInst, aggInst, otherInst);
    int numMap = computeNumMapTasks(vs, inIx, blocksize, maxPMap, jinst.getJobType());
    int numPMap = Math.min(numMap, maxPMap);
    // effective map dop
    int numEPMap = Math.max(Math.min(numMap, maxPMap / 2), 1);
    int numRed = computeNumReduceTasks(vs, mapOutIx, jinst.getJobType());
    int numPRed = Math.min(numRed, maxPRed);
    // effective reduce dop
    int numEPRed = Math.max(Math.min(numRed, maxPRed / 2), 1);
    LOG.debug("Meta nmap = " + numMap + ", nred = " + numRed + "; npmap = " + numPMap + ", npred = " + numPRed + "; nepmap = " + numEPMap + ", nepred = " + numEPRed);
    // step 0: export if inputs in mem
    double exportCosts = 0;
    for (int i = 0; i < jinst.getInputVars().length; i++) if (vs[i]._inmem)
        exportCosts += getHDFSWriteTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
    // step 1: MR job / task latency (normalization by effective dop)
    double jobLatencyCosts = localJob ? DEFAULT_MR_JOB_LATENCY_LOCAL : DEFAULT_MR_JOB_LATENCY_REMOTE;
    double taskLatencyCost = (numMap / numEPMap + numEPRed) * (localJob ? DEFAULT_MR_TASK_LATENCY_LOCAL : DEFAULT_MR_TASK_LATENCY_REMOTE);
    double latencyCosts = jobLatencyCosts + taskLatencyCost;
    // step 2: parallel read of inputs (normalization by effective dop)
    double hdfsReadCosts = 0;
    for (int i = 0; i < jinst.getInputVars().length; i++) hdfsReadCosts += getHDFSReadTime(vs[i]._rlen, vs[i]._clen, vs[i].getSparsity());
    hdfsReadCosts /= numEPMap;
    // step 3: parallel MR instructions
    String[] mapperInst = new String[] { rdInst, rrInst, mapInst };
    String[] reducerInst = new String[] { shfInst, aggInst, otherInst };
    // map instructions compute/distcache read (normalization by effective dop)
    // read through distributed cache
    double mapDCReadCost = 0;
    // map compute cost
    double mapCosts = 0;
    double shuffleCosts = 0;
    // reduce compute costs
    double reduceCosts = 0;
    for (String instCat : mapperInst) if (instCat != null && instCat.length() > 0) {
        String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
        for (String tmp : linst) {
            // map compute costs
            Object[] o = extractMRInstStatistics(tmp, vs);
            String opcode = InstructionUtils.getOpCode(tmp);
            mapCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
            // dist cache read costs
            int dcIndex = getDistcacheIndex(tmp);
            if (dcIndex >= 0) {
                mapDCReadCost += Math.min(getFSReadTime(vs[dcIndex]._rlen, vs[dcIndex]._clen, vs[dcIndex].getSparsity()), // 32MB partitions
                getFSReadTime(DistributedCacheInput.PARTITION_SIZE, 1, 1.0)) * // read in each task
                numMap;
            }
        }
    }
    mapCosts /= numEPMap;
    mapDCReadCost /= numEPMap;
    if (!mapOnly) {
        // shuffle costs (normalization by effective map/reduce dop)
        for (int i = 0; i < mapOutIx.length; i++) {
            shuffleCosts += (getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) * 4 / numEPRed + getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed);
            // TODO this is a workaround - we need to address the number of map output blocks in a more systematic way
            for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
                String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
                for (String tmp : linst) {
                    if (InstructionUtils.getMRType(tmp) == MRType.Aggregate)
                        shuffleCosts += numMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSWriteTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPMap + numPMap * getFSReadTime(vs[mapOutIx[i]]._rlen, vs[mapOutIx[i]]._clen, vs[mapOutIx[i]].getSparsity()) / numEPRed;
                }
            }
        }
        // reduce instructions compute (normalization by effective dop)
        for (String instCat : reducerInst) if (instCat != null && instCat.length() > 0) {
            String[] linst = instCat.split(Lop.INSTRUCTION_DELIMITOR);
            for (String tmp : linst) {
                Object[] o = extractMRInstStatistics(tmp, vs);
                if (InstructionUtils.getMRType(tmp) == MRType.Aggregate)
                    o[1] = new String[] { String.valueOf(numMap) };
                String opcode = InstructionUtils.getOpCode(tmp);
                reduceCosts += getInstTimeEstimate(opcode, (VarStats[]) o[0], (String[]) o[1], ExecType.MR);
            }
        }
        reduceCosts /= numEPRed;
    }
    // step 4: parallel write of outputs (normalization by effective dop)
    double hdfsWriteCosts = 0;
    for (int i = 0; i < jinst.getOutputVars().length; i++) {
        hdfsWriteCosts += getHDFSWriteTime(vs[retIx[i]]._rlen, vs[retIx[i]]._clen, vs[retIx[i]].getSparsity());
    }
    hdfsWriteCosts /= ((mapOnly) ? numEPMap : numEPRed);
    // debug output
    if (LOG.isDebugEnabled()) {
        LOG.debug("Costs Export = " + exportCosts);
        LOG.debug("Costs Latency = " + latencyCosts);
        LOG.debug("Costs HDFS Read = " + hdfsReadCosts);
        LOG.debug("Costs Distcache Read = " + mapDCReadCost);
        LOG.debug("Costs Map Exec = " + mapCosts);
        LOG.debug("Costs Shuffle = " + shuffleCosts);
        LOG.debug("Costs Reduce Exec = " + reduceCosts);
        LOG.debug("Costs HDFS Write = " + hdfsWriteCosts);
    }
    // aggregate individual cost factors
    return exportCosts + latencyCosts + hdfsReadCosts + mapCosts + mapDCReadCost + shuffleCosts + reduceCosts + hdfsWriteCosts;
}
Also used : MRJobResourceInstruction(org.apache.sysml.yarn.ropt.MRJobResourceInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction)

Example 13 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class Dag method generateMapReduceInstructions.

/**
 * Method to generate MapReduce job instructions from a given set of nodes.
 *
 * @param execNodes list of exec nodes
 * @param inst list of instructions
 * @param writeinst list of write instructions
 * @param deleteinst list of delete instructions
 * @param rmvarinst list of rmvar instructions
 * @param jt job type
 */
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
    ArrayList<Byte> resultIndices = new ArrayList<>();
    ArrayList<String> inputs = new ArrayList<>();
    ArrayList<String> outputs = new ArrayList<>();
    ArrayList<InputInfo> inputInfos = new ArrayList<>();
    ArrayList<OutputInfo> outputInfos = new ArrayList<>();
    ArrayList<Long> numRows = new ArrayList<>();
    ArrayList<Long> numCols = new ArrayList<>();
    ArrayList<Long> numRowsPerBlock = new ArrayList<>();
    ArrayList<Long> numColsPerBlock = new ArrayList<>();
    ArrayList<String> mapperInstructions = new ArrayList<>();
    ArrayList<String> randInstructions = new ArrayList<>();
    ArrayList<String> recordReaderInstructions = new ArrayList<>();
    int numReducers = 0;
    int replication = 1;
    ArrayList<String> inputLabels = new ArrayList<>();
    ArrayList<String> outputLabels = new ArrayList<>();
    ArrayList<Instruction> renameInstructions = new ArrayList<>();
    ArrayList<Instruction> variableInstructions = new ArrayList<>();
    ArrayList<Instruction> postInstructions = new ArrayList<>();
    ArrayList<Integer> MRJobLineNumbers = null;
    if (DMLScript.ENABLE_DEBUG_MODE) {
        MRJobLineNumbers = new ArrayList<>();
    }
    ArrayList<Lop> inputLops = new ArrayList<>();
    boolean cellModeOverride = false;
    /* Find the nodes that produce an output */
    ArrayList<Lop> rootNodes = new ArrayList<>();
    getOutputNodes(execNodes, rootNodes, jt);
    if (LOG.isTraceEnabled())
        LOG.trace("# of root nodes = " + rootNodes.size());
    /* Remove transient writes that are simple copy of transient reads */
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        ArrayList<Lop> markedNodes = new ArrayList<>();
        // only keep data nodes that are results of some computation.
        for (Lop rnode : rootNodes) {
            if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
                // no computation, just a copy
                if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
                    markedNodes.add(rnode);
                }
            }
        }
        // delete marked nodes
        rootNodes.removeAll(markedNodes);
        markedNodes.clear();
        if (rootNodes.isEmpty())
            return;
    }
    // structure that maps node to their indices that will be used in the instructions
    HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
    for (Lop rnode : rootNodes) {
        getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
    }
    // In case of RAND job, instructions are defined in the input file
    if (jt == JobType.DATAGEN)
        randInstructions = inputs;
    int[] start_index = new int[1];
    start_index[0] = inputs.size();
    // currently, recordreader instructions are allowed only in GMR jobs
    if (jt == JobType.GMR || jt == JobType.GMRCELL) {
        for (Lop rnode : rootNodes) {
            getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
            if (recordReaderInstructions.size() > 1)
                throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
        }
    }
    // 
    if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
        for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
            cellModeOverride = true;
    }
    if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
        cellModeOverride = true;
    for (int i = 0; i < rootNodes.size(); i++) {
        getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Input strings: " + inputs.toString());
        if (jt == JobType.DATAGEN)
            LOG.trace("    Rand instructions: " + getCSVString(randInstructions));
        if (jt == JobType.GMR)
            LOG.trace("    RecordReader instructions: " + getCSVString(recordReaderInstructions));
        LOG.trace("    Mapper instructions: " + getCSVString(mapperInstructions));
    }
    /* Get Shuffle and Reducer Instructions */
    ArrayList<String> shuffleInstructions = new ArrayList<>();
    ArrayList<String> aggInstructionsReducer = new ArrayList<>();
    ArrayList<String> otherInstructionsReducer = new ArrayList<>();
    for (Lop rn : rootNodes) {
        int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
        if (resultIndex == -1)
            throw new LopsException("Unexpected error in piggybacking!");
        if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
            // Both rn (a transient write) and its input are root nodes.
            // Instead of creating two copies of the data, simply generate a cpvar instruction
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
            writeinst.addAll(out.getLastInstructions());
        } else {
            resultIndices.add(Byte.valueOf((byte) resultIndex));
            // setup output filenames and outputInfos and generate related instructions
            NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
            outputLabels.add(out.getVarName());
            outputs.add(out.getFileName());
            outputInfos.add(out.getOutInfo());
            if (LOG.isTraceEnabled()) {
                LOG.trace("    Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
            }
            renameInstructions.addAll(out.getLastInstructions());
            variableInstructions.addAll(out.getPreInstructions());
            postInstructions.addAll(out.getPostInstructions());
        }
    }
    /* Determine if the output dimensions are known */
    byte[] resultIndicesByte = new byte[resultIndices.size()];
    for (int i = 0; i < resultIndicesByte.length; i++) {
        resultIndicesByte[i] = resultIndices.get(i).byteValue();
    }
    if (LOG.isTraceEnabled()) {
        LOG.trace("    Shuffle Instructions: " + getCSVString(shuffleInstructions));
        LOG.trace("    Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
        LOG.trace("    Other instructions =" + getCSVString(otherInstructionsReducer));
        LOG.trace("    Output strings: " + outputs.toString());
        LOG.trace("    ResultIndices = " + resultIndices.toString());
    }
    /* Prepare the MapReduce job instruction */
    MRJobInstruction mr = new MRJobInstruction(jt);
    // check if this is a map-only job. If not, set the number of reducers
    if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
        numReducers = total_reducers;
    // set inputs, outputs, and other other properties for the job
    mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
    mr.setOutputs(resultIndicesByte);
    mr.setDimsUnknownFilePrefix(getFilePath());
    mr.setNumberOfReducers(numReducers);
    mr.setReplication(replication);
    // set instructions for recordReader and mapper
    mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
    mr.setMapperInstructions(getCSVString(mapperInstructions));
    // compute and set mapper memory requirements (for consistency of runtime piggybacking)
    if (jt == JobType.GMR) {
        double mem = 0;
        for (Lop n : execNodes) mem += computeFootprintInMapper(n);
        mr.setMemoryRequirements(mem);
    }
    if (jt == JobType.DATAGEN)
        mr.setRandInstructions(getCSVString(randInstructions));
    // set shuffle instructions
    mr.setShuffleInstructions(getCSVString(shuffleInstructions));
    // set reducer instruction
    mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
    mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
    if (DMLScript.ENABLE_DEBUG_MODE) {
        // set line number information for each MR instruction
        mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
    }
    /* Add the prepared instructions to output set */
    inst.addAll(variableInstructions);
    inst.add(mr);
    inst.addAll(postInstructions);
    deleteinst.addAll(renameInstructions);
    for (Lop l : inputLops) {
        if (DMLScript.ENABLE_DEBUG_MODE) {
            processConsumers(l, rmvarinst, deleteinst, l);
        } else {
            processConsumers(l, rmvarinst, deleteinst, null);
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) Data(org.apache.sysml.lops.Data) Lop(org.apache.sysml.lops.Lop) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) LopsException(org.apache.sysml.lops.LopsException)

Example 14 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class DMLDebuggerProgramInfo method accesBreakpointInstruction.

/**
 * Access breakpoint instruction at specified line number in set of instructions (if valid)
 * @param instructions Instructions for current program block
 * @param lineNumber Location for inserting breakpoint
 * @param op Breakpoint operation
 * @param status Current breakpoint status
 */
private void accesBreakpointInstruction(ArrayList<Instruction> instructions, int lineNumber, int op, BPINSTRUCTION_STATUS status) {
    for (int i = 0; i < instructions.size(); i++) {
        Instruction currInst = instructions.get(i);
        if (op == 0) {
            if (currInst instanceof MRJobInstruction) {
                MRJobInstruction currMRInst = (MRJobInstruction) currInst;
                // Check if current instruction line number correspond to breakpoint line number
                if (currMRInst.findMRInstructions(lineNumber)) {
                    BreakPointInstruction breakpoint = new BreakPointInstruction();
                    breakpoint.setLocation(currInst);
                    breakpoint.setInstID(instID++);
                    breakpoint.setBPInstructionLocation(location);
                    instructions.add(i, breakpoint);
                    DMLBreakpointManager.insertBreakpoint(breakpoint, lineNumber);
                    return;
                }
            } else if (currInst instanceof CPInstruction || currInst instanceof SPInstruction) {
                // Check if current instruction line number correspond to breakpoint line number
                if (currInst.getLineNum() == lineNumber) {
                    BreakPointInstruction breakpoint = new BreakPointInstruction();
                    breakpoint.setLocation(currInst);
                    breakpoint.setInstID(instID++);
                    breakpoint.setBPInstructionLocation(location);
                    instructions.add(i, breakpoint);
                    DMLBreakpointManager.insertBreakpoint(breakpoint, lineNumber);
                    return;
                }
            } else if (currInst instanceof BreakPointInstruction && currInst.getLineNum() == lineNumber) {
                BreakPointInstruction breakpoint = (BreakPointInstruction) currInst;
                breakpoint.setBPInstructionStatus(BPINSTRUCTION_STATUS.ENABLED);
                breakpoint.setBPInstructionLocation(location);
                instructions.set(i, breakpoint);
                DMLBreakpointManager.updateBreakpoint(lineNumber, status);
                return;
            }
        } else {
            // Check if current instruction line number correspond to breakpoint line number
            if (currInst instanceof BreakPointInstruction && currInst.getLineNum() == lineNumber) {
                if (op == 1) {
                    BreakPointInstruction breakpoint = (BreakPointInstruction) currInst;
                    breakpoint.setLocation(currInst);
                    breakpoint.setInstID(currInst.getInstID());
                    breakpoint.setBPInstructionStatus(status);
                    breakpoint.setBPInstructionLocation(location);
                    instructions.set(i, breakpoint);
                    DMLBreakpointManager.updateBreakpoint(lineNumber, status);
                } else {
                    instructions.remove(i);
                    DMLBreakpointManager.removeBreakpoint(lineNumber, status);
                }
                return;
            }
        }
    }
}
Also used : SPInstruction(org.apache.sysml.runtime.instructions.spark.SPInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) BreakPointInstruction(org.apache.sysml.runtime.instructions.cp.BreakPointInstruction) SPInstruction(org.apache.sysml.runtime.instructions.spark.SPInstruction) BreakPointInstruction(org.apache.sysml.runtime.instructions.cp.BreakPointInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction)

Example 15 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class ExternalFunctionProgramBlock method getCell2BlockInstructions.

/**
 * Method to generate a reblock job to convert the cell representation into block representation
 *
 * @param outputParams list out output data identifiers
 * @param blockedFileNames map of blocked file names
 * @return list of instructions
 */
private ArrayList<Instruction> getCell2BlockInstructions(ArrayList<DataIdentifier> outputParams, HashMap<String, String> blockedFileNames) {
    ArrayList<Instruction> c2binst = null;
    // list of matrices that need to be reblocked
    ArrayList<DataIdentifier> matrices = new ArrayList<>();
    ArrayList<DataIdentifier> matricesNoReblock = new ArrayList<>();
    // identify outputs that are matrices
    for (int i = 0; i < outputParams.size(); i++) {
        if (outputParams.get(i).getDataType().isMatrix()) {
            if (_skipOutReblock.contains(outputParams.get(i).getName()))
                matricesNoReblock.add(outputParams.get(i));
            else
                matrices.add(outputParams.get(i));
        }
    }
    if (!matrices.isEmpty()) {
        c2binst = new ArrayList<>();
        MRJobInstruction reblkInst = new MRJobInstruction(JobType.REBLOCK);
        TreeMap<Integer, ArrayList<String>> MRJobLineNumbers = null;
        if (DMLScript.ENABLE_DEBUG_MODE) {
            MRJobLineNumbers = new TreeMap<>();
        }
        ArrayList<String> inLabels = new ArrayList<>();
        ArrayList<String> outLabels = new ArrayList<>();
        String[] outputs = new String[matrices.size()];
        byte[] resultIndex = new byte[matrices.size()];
        String reblock = "";
        // Keep a copy of a single MR reblock instruction
        String reblockStr = "";
        String scratchSpaceLoc = ConfigurationManager.getScratchSpace();
        try {
            // create a RBLK job that transforms each output matrix from cell to block
            for (int i = 0; i < matrices.size(); i++) {
                inLabels.add(matrices.get(i).getName());
                outLabels.add(matrices.get(i).getName() + "_extFnOutput");
                outputs[i] = scratchSpaceLoc + Lop.FILE_SEPARATOR + Lop.PROCESS_PREFIX + DMLScript.getUUID() + Lop.FILE_SEPARATOR + _otherParams.get(ExternalFunctionStatement.CLASS_NAME) + _runID + "_" + i + "Output";
                blockedFileNames.put(matrices.get(i).getName(), outputs[i]);
                // (matrices.size()+i);
                resultIndex[i] = (byte) i;
                if (i > 0)
                    reblock += Lop.INSTRUCTION_DELIMITOR;
                reblock += "MR" + Lop.OPERAND_DELIMITOR + "rblk" + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + "true";
                if (DMLScript.ENABLE_DEBUG_MODE) {
                    // Create a copy of reblock instruction but as a single instruction (FOR DEBUGGER)
                    reblockStr = "MR" + Lop.OPERAND_DELIMITOR + "rblk" + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + i + Lop.DATATYPE_PREFIX + matrices.get(i).getDataType() + Lop.VALUETYPE_PREFIX + matrices.get(i).getValueType() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + ConfigurationManager.getBlocksize() + Lop.OPERAND_DELIMITOR + "true";
                    // Set MR reblock instruction line number (FOR DEBUGGER)
                    if (!MRJobLineNumbers.containsKey(matrices.get(i).getBeginLine())) {
                        MRJobLineNumbers.put(matrices.get(i).getBeginLine(), new ArrayList<String>());
                    }
                    MRJobLineNumbers.get(matrices.get(i).getBeginLine()).add(reblockStr);
                }
                // create metadata instructions to populate symbol table
                // with variables that hold blocked matrices
                Instruction createInst = VariableCPInstruction.prepareCreateMatrixVariableInstruction(outLabels.get(i), outputs[i], false, OutputInfo.outputInfoToString(OutputInfo.BinaryBlockOutputInfo));
                createInst.setLocation(matrices.get(i));
                c2binst.add(createInst);
            }
            reblkInst.setReBlockInstructions(inLabels.toArray(new String[inLabels.size()]), "", reblock, "", outLabels.toArray(new String[inLabels.size()]), resultIndex, 1, 1);
            c2binst.add(reblkInst);
            // generate instructions that rename the output variables of REBLOCK job
            Instruction cpInst = null, rmInst = null;
            for (int i = 0; i < matrices.size(); i++) {
                cpInst = VariableCPInstruction.prepareCopyInstruction(outLabels.get(i), matrices.get(i).getName());
                rmInst = VariableCPInstruction.prepareRemoveInstruction(outLabels.get(i));
                cpInst.setLocation(matrices.get(i));
                rmInst.setLocation(matrices.get(i));
                c2binst.add(cpInst);
                c2binst.add(rmInst);
            // c2binst.add(CPInstructionParser.parseSingleInstruction("CP" + Lops.OPERAND_DELIMITOR + "cpvar"+Lops.OPERAND_DELIMITOR+ outLabels.get(i) + Lops.OPERAND_DELIMITOR + matrices.get(i).getName()));
            }
        } catch (Exception e) {
            throw new RuntimeException(this.printBlockErrorLocation() + "error generating instructions", e);
        }
        // LOGGING instructions
        if (LOG.isTraceEnabled()) {
            LOG.trace("\n--- Cell-2-Block Instructions ---");
            for (Instruction i : c2binst) {
                LOG.trace(i.toString());
            }
            LOG.trace("----------------------------------");
        }
    }
    // null if no output matrices
    return c2binst;
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) DataIdentifier(org.apache.sysml.parser.DataIdentifier) ArrayList(java.util.ArrayList) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) ExternalFunctionInvocationInstruction(org.apache.sysml.udf.ExternalFunctionInvocationInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)17 Instruction (org.apache.sysml.runtime.instructions.Instruction)10 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)7 VariableCPInstruction (org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)6 ArrayList (java.util.ArrayList)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 BreakPointInstruction (org.apache.sysml.runtime.instructions.cp.BreakPointInstruction)4 SPInstruction (org.apache.sysml.runtime.instructions.spark.SPInstruction)4 FunctionCallCPInstruction (org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction)3 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)3 ExternalFunctionInvocationInstruction (org.apache.sysml.udf.ExternalFunctionInvocationInstruction)3 HashMap (java.util.HashMap)2 DataIdentifier (org.apache.sysml.parser.DataIdentifier)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 IOException (java.io.IOException)1 LinkedList (java.util.LinkedList)1 StringTokenizer (java.util.StringTokenizer)1 Data (org.apache.sysml.lops.Data)1 Lop (org.apache.sysml.lops.Lop)1 LopsException (org.apache.sysml.lops.LopsException)1