Search in sources :

Example 1 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class CostEstimator method extractMRJobInstStatistics.

private Object[] extractMRJobInstStatistics(Instruction inst, HashMap<String, VarStats> stats) {
    //stats, attrs
    Object[] ret = new Object[2];
    VarStats[] vs = null;
    String[] attr = null;
    MRJobInstruction jinst = (MRJobInstruction) inst;
    //get number of indices 
    byte[] indexes = jinst.getIv_resultIndices();
    byte maxIx = -1;
    for (int i = 0; i < indexes.length; i++) if (maxIx < indexes[i])
        maxIx = indexes[i];
    vs = new VarStats[maxIx + 1];
    //get inputs, intermediates, and outputs
    for (int i = 0; i < vs.length; i++) {
        vs[i] = stats.get(String.valueOf(i));
        if (vs[i] == null) {
            vs[i] = _unknownStats;
        }
    }
    //result preparation
    ret[0] = vs;
    ret[1] = attr;
    return ret;
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject)

Example 2 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class CostEstimator method rGetTimeEstimate.

private double rGetTimeEstimate(ProgramBlock pb, HashMap<String, VarStats> stats, HashSet<String> memoFunc, boolean recursive) throws DMLRuntimeException {
    double ret = 0;
    if (pb instanceof WhileProgramBlock) {
        WhileProgramBlock tmp = (WhileProgramBlock) pb;
        if (recursive)
            for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
        ret *= DEFAULT_NUMITER;
    } else if (pb instanceof IfProgramBlock) {
        IfProgramBlock tmp = (IfProgramBlock) pb;
        if (recursive) {
            for (ProgramBlock pb2 : tmp.getChildBlocksIfBody()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
            if (tmp.getChildBlocksElseBody() != null)
                for (ProgramBlock pb2 : tmp.getChildBlocksElseBody()) {
                    ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
                    //weighted sum	
                    ret /= 2;
                }
        }
    } else if (//includes ParFORProgramBlock
    pb instanceof ForProgramBlock) {
        ForProgramBlock tmp = (ForProgramBlock) pb;
        if (recursive)
            for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
        ret *= getNumIterations(stats, tmp.getIterablePredicateVars());
    } else if (pb instanceof FunctionProgramBlock && //see generic
    !(pb instanceof ExternalFunctionProgramBlock)) {
        FunctionProgramBlock tmp = (FunctionProgramBlock) pb;
        if (recursive)
            for (ProgramBlock pb2 : tmp.getChildBlocks()) ret += rGetTimeEstimate(pb2, stats, memoFunc, recursive);
    } else {
        ArrayList<Instruction> tmp = pb.getInstructions();
        for (Instruction inst : tmp) {
            if (//CP
            inst instanceof CPInstruction) {
                //obtain stats from createvar, cpvar, rmvar, rand
                maintainCPInstVariableStatistics((CPInstruction) inst, stats);
                //extract statistics (instruction-specific)
                Object[] o = extractCPInstStatistics(inst, stats);
                VarStats[] vs = (VarStats[]) o[0];
                String[] attr = (String[]) o[1];
                //if(LOG.isDebugEnabled())
                //	LOG.debug(inst);
                //call time estimation for inst
                ret += getCPInstTimeEstimate(inst, vs, attr);
                if (//functions
                inst instanceof FunctionCallCPInstruction) {
                    FunctionCallCPInstruction finst = (FunctionCallCPInstruction) inst;
                    String fkey = DMLProgram.constructFunctionKey(finst.getNamespace(), finst.getFunctionName());
                    //awareness of recursive functions, missing program
                    if (!memoFunc.contains(fkey) && pb.getProgram() != null) {
                        if (LOG.isDebugEnabled())
                            LOG.debug("Begin Function " + fkey);
                        memoFunc.add(fkey);
                        Program prog = pb.getProgram();
                        FunctionProgramBlock fpb = prog.getFunctionProgramBlock(finst.getNamespace(), finst.getFunctionName());
                        ret += rGetTimeEstimate(fpb, stats, memoFunc, recursive);
                        memoFunc.remove(fkey);
                        if (LOG.isDebugEnabled())
                            LOG.debug("End Function " + fkey);
                    }
                }
            } else if (//MR
            inst instanceof MRJobInstruction) {
                //obtain stats for job
                maintainMRJobInstVariableStatistics(inst, stats);
                //extract input statistics
                Object[] o = extractMRJobInstStatistics(inst, stats);
                VarStats[] vs = (VarStats[]) o[0];
                if (LOG.isDebugEnabled())
                    LOG.debug("Begin MRJob type=" + ((MRJobInstruction) inst).getJobType());
                //call time estimation for complex MR inst
                ret += getMRJobInstTimeEstimate(inst, vs, null);
                if (LOG.isDebugEnabled())
                    LOG.debug("End MRJob");
                //cleanup stats for job
                cleanupMRJobVariableStatistics(inst, stats);
            }
        }
    }
    return ret;
}
Also used : IfProgramBlock(org.apache.sysml.runtime.controlprogram.IfProgramBlock) FunctionProgramBlock(org.apache.sysml.runtime.controlprogram.FunctionProgramBlock) ExternalFunctionProgramBlock(org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) DMLProgram(org.apache.sysml.parser.DMLProgram) Program(org.apache.sysml.runtime.controlprogram.Program) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) ArrayList(java.util.ArrayList) WhileProgramBlock(org.apache.sysml.runtime.controlprogram.WhileProgramBlock) MultiReturnBuiltinCPInstruction(org.apache.sysml.runtime.instructions.cp.MultiReturnBuiltinCPInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) DataGenCPInstruction(org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) MMTSJCPInstruction(org.apache.sysml.runtime.instructions.cp.MMTSJCPInstruction) BinaryCPInstruction(org.apache.sysml.runtime.instructions.cp.BinaryCPInstruction) StringInitCPInstruction(org.apache.sysml.runtime.instructions.cp.StringInitCPInstruction) AggregateUnaryCPInstruction(org.apache.sysml.runtime.instructions.cp.AggregateUnaryCPInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) AggregateTernaryCPInstruction(org.apache.sysml.runtime.instructions.cp.AggregateTernaryCPInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) UnaryCPInstruction(org.apache.sysml.runtime.instructions.cp.UnaryCPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) ParameterizedBuiltinCPInstruction(org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction) MultiReturnBuiltinCPInstruction(org.apache.sysml.runtime.instructions.cp.MultiReturnBuiltinCPInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) DataGenCPInstruction(org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) MMTSJCPInstruction(org.apache.sysml.runtime.instructions.cp.MMTSJCPInstruction) BinaryCPInstruction(org.apache.sysml.runtime.instructions.cp.BinaryCPInstruction) StringInitCPInstruction(org.apache.sysml.runtime.instructions.cp.StringInitCPInstruction) AggregateUnaryCPInstruction(org.apache.sysml.runtime.instructions.cp.AggregateUnaryCPInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) AggregateTernaryCPInstruction(org.apache.sysml.runtime.instructions.cp.AggregateTernaryCPInstruction) UnaryCPInstruction(org.apache.sysml.runtime.instructions.cp.UnaryCPInstruction) ParameterizedBuiltinCPInstruction(org.apache.sysml.runtime.instructions.cp.ParameterizedBuiltinCPInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) ExternalFunctionProgramBlock(org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock) ForProgramBlock(org.apache.sysml.runtime.controlprogram.ForProgramBlock) IfProgramBlock(org.apache.sysml.runtime.controlprogram.IfProgramBlock) FunctionProgramBlock(org.apache.sysml.runtime.controlprogram.FunctionProgramBlock) ProgramBlock(org.apache.sysml.runtime.controlprogram.ProgramBlock) WhileProgramBlock(org.apache.sysml.runtime.controlprogram.WhileProgramBlock) ExternalFunctionProgramBlock(org.apache.sysml.runtime.controlprogram.ExternalFunctionProgramBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject)

Example 3 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class CostEstimator method maintainMRJobInstVariableStatistics.

private void maintainMRJobInstVariableStatistics(Instruction inst, HashMap<String, VarStats> stats) throws DMLRuntimeException {
    MRJobInstruction jobinst = (MRJobInstruction) inst;
    //input sizes (varname, index mapping)
    String[] inVars = jobinst.getInputVars();
    int index = -1;
    for (String varname : inVars) {
        VarStats vs = stats.get(varname);
        if (vs == null)
            vs = _unknownStats;
        stats.put(String.valueOf(++index), vs);
    }
    //rand output
    String rdInst = jobinst.getIv_randInstructions();
    if (rdInst != null && rdInst.length() > 0) {
        StringTokenizer st = new StringTokenizer(rdInst, Lop.INSTRUCTION_DELIMITOR);
        while (//foreach rand instruction
        st.hasMoreTokens()) {
            String[] parts = InstructionUtils.getInstructionParts(st.nextToken());
            byte outIndex = Byte.parseByte(parts[2]);
            long rlen = parts[3].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[3]);
            long clen = parts[4].contains(Lop.VARIABLE_NAME_PLACEHOLDER) ? -1 : UtilFunctions.parseToLong(parts[4]);
            long brlen = Long.parseLong(parts[5]);
            long bclen = Long.parseLong(parts[6]);
            long nnz = (long) (Double.parseDouble(parts[9]) * rlen * clen);
            VarStats vs = new VarStats(rlen, clen, brlen, bclen, nnz, false);
            stats.put(String.valueOf(outIndex), vs);
        }
    }
    //compute intermediate result indices
    HashMap<Byte, MatrixCharacteristics> dims = new HashMap<Byte, MatrixCharacteristics>();
    //populate input indices
    for (Entry<String, VarStats> e : stats.entrySet()) {
        if (UtilFunctions.isIntegerNumber(e.getKey())) {
            byte ix = Byte.parseByte(e.getKey());
            VarStats vs = e.getValue();
            if (vs != null) {
                MatrixCharacteristics mc = new MatrixCharacteristics(vs._rlen, vs._clen, (int) vs._brlen, (int) vs._bclen, (long) vs._nnz);
                dims.put(ix, mc);
            }
        }
    }
    //compute dims for all instructions
    String[] instCat = new String[] { jobinst.getIv_randInstructions(), jobinst.getIv_recordReaderInstructions(), jobinst.getIv_instructionsInMapper(), jobinst.getIv_shuffleInstructions(), jobinst.getIv_aggInstructions(), jobinst.getIv_otherInstructions() };
    for (String linstCat : instCat) if (linstCat != null && linstCat.length() > 0) {
        String[] linst = linstCat.split(Instruction.INSTRUCTION_DELIM);
        for (String instStr : linst) {
            String instStr2 = replaceInstructionPatch(instStr);
            MRInstruction mrinst = MRInstructionParser.parseSingleInstruction(instStr2);
            MatrixCharacteristics.computeDimension(dims, mrinst);
        }
    }
    //create varstats if necessary
    for (Entry<Byte, MatrixCharacteristics> e : dims.entrySet()) {
        byte ix = e.getKey();
        if (!stats.containsKey(String.valueOf(ix))) {
            MatrixCharacteristics mc = e.getValue();
            VarStats vs = new VarStats(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), mc.getNonZeros(), false);
            stats.put(String.valueOf(ix), vs);
        }
    }
    //map result indexes
    String[] outLabels = jobinst.getOutputVars();
    byte[] resultIndexes = jobinst.getIv_resultIndices();
    for (int i = 0; i < resultIndexes.length; i++) {
        String varname = outLabels[i];
        VarStats varvs = stats.get(String.valueOf(resultIndexes[i]));
        if (varvs == null) {
            varvs = stats.get(outLabels[i]);
        }
        varvs._inmem = false;
        stats.put(varname, varvs);
    }
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) HashMap(java.util.HashMap) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) StringTokenizer(java.util.StringTokenizer) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction)

Example 4 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class PiggybackingWorker method mergeMRJobInstructions.

protected LinkedList<MergedMRJobInstruction> mergeMRJobInstructions(LinkedList<Pair<Long, MRJobInstruction>> workingSet) throws IllegalAccessException {
    LinkedList<MergedMRJobInstruction> ret = new LinkedList<MergedMRJobInstruction>();
    Timing time = new Timing(true);
    //NOTE currently all merged into one (might be invalid due to memory constraints)
    MergedMRJobInstruction minst = new MergedMRJobInstruction();
    for (Pair<Long, MRJobInstruction> inst : workingSet) {
        long instID = inst.getKey();
        MRJobInstruction instVal = inst.getValue();
        int numOutputs = instVal.getOutputs().length;
        //append to current merged instruction
        if (minst.inst == null) {
            //deep copy first instruction
            minst.inst = new MRJobInstruction(instVal);
            minst.addInstructionMetaData(instID, 0, numOutputs);
        } else {
            //merge other instructions
            if (minst.inst.isMergableMRJobInstruction(instVal)) {
                //add instruction to open merged instruction
                //before merge
                int offOutputs = minst.inst.getOutputs().length;
                minst.inst.mergeMRJobInstruction(instVal);
                minst.addInstructionMetaData(instID, offOutputs, numOutputs);
            } else {
                //close current merged instruction
                ret.add(minst);
                //open new merged instruction
                minst = new MergedMRJobInstruction();
                minst.inst = new MRJobInstruction(instVal);
                minst.addInstructionMetaData(instID, 0, numOutputs);
            }
        }
    }
    //close last open merged instruction
    ret.add(minst);
    //output log info for better understandability for users
    LOG.info("Merged MR-Job instructions: " + workingSet.size() + " --> " + ret.size() + " in " + time.stop() + "ms.");
    return ret;
}
Also used : MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) Timing(org.apache.sysml.runtime.controlprogram.parfor.stat.Timing) LinkedList(java.util.LinkedList)

Example 5 with MRJobInstruction

use of org.apache.sysml.runtime.instructions.MRJobInstruction in project incubator-systemml by apache.

the class ProgramConverter method cloneInstruction.

public static Instruction cloneInstruction(Instruction oInst, long pid, boolean plain, boolean cpFunctions) throws DMLRuntimeException {
    Instruction inst = null;
    String tmpString = oInst.toString();
    try {
        if (oInst instanceof CPInstruction || oInst instanceof SPInstruction || oInst instanceof MRInstruction || oInst instanceof GPUInstruction) {
            if (oInst instanceof FunctionCallCPInstruction && cpFunctions) {
                FunctionCallCPInstruction tmp = (FunctionCallCPInstruction) oInst;
                if (!plain) {
                    //safe replacement because target variables might include the function name
                    //note: this is no update-in-place in order to keep the original function name as basis
                    tmpString = tmp.updateInstStringFunctionName(tmp.getFunctionName(), tmp.getFunctionName() + CP_CHILD_THREAD + pid);
                }
            //otherwise: preserve function name
            }
            inst = InstructionParser.parseSingleInstruction(tmpString);
        } else if (oInst instanceof MRJobInstruction) {
            //clone via copy constructor
            inst = new MRJobInstruction((MRJobInstruction) oInst);
        } else
            throw new DMLRuntimeException("Failed to clone instruction: " + oInst);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    //save replacement of thread id references in instructions
    inst = saveReplaceThreadID(inst, ProgramConverter.CP_ROOT_THREAD_ID, ProgramConverter.CP_CHILD_THREAD + pid);
    return inst;
}
Also used : SPInstruction(org.apache.sysml.runtime.instructions.spark.SPInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) SpoofCPInstruction(org.apache.sysml.runtime.instructions.cp.SpoofCPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) GPUInstruction(org.apache.sysml.runtime.instructions.gpu.GPUInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) GPUInstruction(org.apache.sysml.runtime.instructions.gpu.GPUInstruction) MRJobInstruction(org.apache.sysml.runtime.instructions.MRJobInstruction) CPInstruction(org.apache.sysml.runtime.instructions.cp.CPInstruction) ExternalFunctionInvocationInstruction(org.apache.sysml.udf.ExternalFunctionInvocationInstruction) SpoofCPInstruction(org.apache.sysml.runtime.instructions.cp.SpoofCPInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) SPInstruction(org.apache.sysml.runtime.instructions.spark.SPInstruction) VariableCPInstruction(org.apache.sysml.runtime.instructions.cp.VariableCPInstruction) FunctionCallCPInstruction(org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)17 Instruction (org.apache.sysml.runtime.instructions.Instruction)10 CPInstruction (org.apache.sysml.runtime.instructions.cp.CPInstruction)7 VariableCPInstruction (org.apache.sysml.runtime.instructions.cp.VariableCPInstruction)6 ArrayList (java.util.ArrayList)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)4 BreakPointInstruction (org.apache.sysml.runtime.instructions.cp.BreakPointInstruction)4 SPInstruction (org.apache.sysml.runtime.instructions.spark.SPInstruction)4 FunctionCallCPInstruction (org.apache.sysml.runtime.instructions.cp.FunctionCallCPInstruction)3 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)3 ExternalFunctionInvocationInstruction (org.apache.sysml.udf.ExternalFunctionInvocationInstruction)3 HashMap (java.util.HashMap)2 LopsException (org.apache.sysml.lops.LopsException)2 DataIdentifier (org.apache.sysml.parser.DataIdentifier)2 CacheException (org.apache.sysml.runtime.controlprogram.caching.CacheException)2 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)2 IOException (java.io.IOException)1 LinkedList (java.util.LinkedList)1 StringTokenizer (java.util.StringTokenizer)1 HopsException (org.apache.sysml.hops.HopsException)1