Search in sources :

Example 11 with MRInstruction

use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.

the class MRJobConfiguration method computeMatrixCharacteristics.

/**
	 * NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
	 * otherwise, the latter will potentially fail with missing dimension information.
	 * 
	 * @param job job configuration
	 * @param inputIndexes array of byte indexes
	 * @param dataGenInstructions data gen instructions as a string
	 * @param instructionsInMapper instruction in mapper as a string
	 * @param reblockInstructions reblock instructions as a string
	 * @param aggInstructionsInReducer aggregate instructions in reducer as a string
	 * @param aggBinInstructions binary aggregate instructions as a string
	 * @param otherInstructionsInReducer other instructions in reducer as a string
	 * @param resultIndexes array of byte result indexes
	 * @param mapOutputIndexes set of map output indexes
	 * @param forMMCJ ?
	 * @return reducer groups
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) throws DMLRuntimeException {
    HashSet<Byte> intermediateMatrixIndexes = new HashSet<Byte>();
    HashMap<Byte, MatrixCharacteristics> dims = new HashMap<Byte, MatrixCharacteristics>();
    for (byte i : inputIndexes) {
        MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
        dims.put(i, dim);
    }
    DataGenMRInstruction[] dataGenIns = null;
    dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
    if (dataGenIns != null) {
        for (DataGenMRInstruction ins : dataGenIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
        }
    }
    MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
    if (insMapper != null) {
        for (MRInstruction ins : insMapper) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof AppendMInstruction) {
                AppendMInstruction tempIns = (AppendMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AppendGInstruction) {
                AppendGInstruction tempIns = (AppendGInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof BinaryMInstruction) {
                BinaryMInstruction tempIns = (BinaryMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AggregateBinaryInstruction) {
                AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                //TODO
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof MapMultChainInstruction) {
                MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
                intermediateMatrixIndexes.add(tempIns.getInput1());
            } else if (ins instanceof PMMJMRInstruction) {
                PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
                intermediateMatrixIndexes.add(tempIns.input2);
            }
        }
    }
    ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
    if (reblockIns != null) {
        for (ReblockInstruction ins : reblockIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
            setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
        }
    }
    Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
    if (aggIns != null) {
        for (Instruction ins : aggIns) {
            MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            MRInstruction mrins = (MRInstruction) ins;
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == mrins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
                intermediateMatrixIndexes.add(mrins.output);
            }
        }
    }
    long numReduceGroups = 0;
    AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
    if (aggBinIns != null) {
        for (AggregateBinaryInstruction ins : aggBinIns) {
            MatrixCharacteristics dim1 = dims.get(ins.input1);
            MatrixCharacteristics dim2 = dims.get(ins.input2);
            setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
            setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
            MatrixCharacteristics.computeDimension(dims, ins);
            if (//there will be only one aggbin operation for MMCJ
            forMMCJ)
                numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
        }
    }
    if (!forMMCJ) {
        //store the skylines
        ArrayList<Long> xs = new ArrayList<Long>(mapOutputIndexes.size());
        ArrayList<Long> ys = new ArrayList<Long>(mapOutputIndexes.size());
        for (byte idx : mapOutputIndexes) {
            MatrixCharacteristics dim = dims.get(idx);
            long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
            long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
            int i = 0;
            boolean toadd = true;
            while (i < xs.size()) {
                if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
                    //remove any included x's and y's
                    xs.remove(i);
                    ys.remove(i);
                } else if (//if included in others, stop
                x <= xs.get(i) && y <= ys.get(i)) {
                    toadd = false;
                    break;
                } else
                    i++;
            }
            if (toadd) {
                xs.add(x);
                ys.add(y);
            }
        }
        //sort by x
        TreeMap<Long, Long> map = new TreeMap<Long, Long>();
        for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
        numReduceGroups = 0;
        //compute area
        long prev = 0;
        for (Entry<Long, Long> e : map.entrySet()) {
            numReduceGroups += (e.getKey() - prev) * e.getValue();
            prev = e.getKey();
        }
    }
    MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
    if (insReducer != null) {
        for (MRInstruction ins : insReducer) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof RemoveEmptyMRInstruction) {
                RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            }
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == ins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
                intermediateMatrixIndexes.add(ins.output);
            }
        }
    }
    setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
    for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    MatrixCharacteristics resultDims;
    for (int i = 0; i < resultIndexes.length; i++) {
        resultDims = dims.get(resultIndexes[i]);
        stats[i] = resultDims;
        setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
    }
    return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) CM_N_COVInstruction(org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) TreeMap(java.util.TreeMap) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Example 12 with MRInstruction

use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.

the class ReduceBase method configure.

public void configure(JobConf job) {
    super.configure(job);
    reducerID = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
    dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix");
    //get the indexes of the final output matrices
    resultIndexes = MRJobConfiguration.getResultIndexes(job);
    resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job);
    //initialize SystemML Counters (defined in MRJobConfiguration)
    resultsNonZeros = new long[resultIndexes.length];
    resultsMaxRowDims = new long[resultIndexes.length];
    resultsMaxColDims = new long[resultIndexes.length];
    collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job);
    //parse aggregate operations
    AggregateInstruction[] agg_insts = null;
    try {
        agg_insts = MRJobConfiguration.getAggregateInstructions(job);
        //parse unary and binary operations
        MRInstruction[] tmp = MRJobConfiguration.getInstructionsInReducer(job);
        if (tmp != null) {
            mixed_instructions = new ArrayList<MRInstruction>();
            Collections.addAll(mixed_instructions, tmp);
        }
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }
    //load data from distributed cache (if required, reuse if jvm_reuse)
    try {
        setupDistCacheFiles(job);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
    //reorganize the aggregate instructions, so that they are all associatied with each input
    if (agg_insts != null) {
        for (AggregateInstruction ins : agg_insts) {
            //associate instruction to its input
            ArrayList<AggregateInstruction> vec = agg_instructions.get(ins.input);
            if (vec == null) {
                vec = new ArrayList<AggregateInstruction>();
                agg_instructions.put(ins.input, vec);
            }
            vec.add(ins);
            if (ins.input == ins.output)
                continue;
            //need to add new aggregate instructions so that partial aggregation can be applied
            //this is important for combiner in the reducer side
            AggregateInstruction partialIns = new AggregateInstruction(ins.getOperator(), ins.output, ins.output, ins.toString());
            vec = agg_instructions.get(partialIns.input);
            if (vec == null) {
                vec = new ArrayList<AggregateInstruction>();
                agg_instructions.put(partialIns.input, vec);
            }
            vec.add(partialIns);
        }
    }
}
Also used : DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) IOException(java.io.IOException) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 13 with MRInstruction

use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.

the class CostEstimatorStaticRuntime method getMapOutputIndexes.

private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) throws DMLRuntimeException {
    //note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
    //map indices
    HashSet<Byte> ixMap = new HashSet<Byte>();
    for (byte ix : inIx) ixMap.add(ix);
    if (rdInst != null && rdInst.length() > 0) {
        rdInst = replaceInstructionPatch(rdInst);
        DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
        for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    if (mapInst != null && mapInst.length() > 0) {
        mapInst = replaceInstructionPatch(mapInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    //reduce indices
    HashSet<Byte> ixRed = new HashSet<Byte>();
    for (byte ix : retIx) ixRed.add(ix);
    if (shfInst != null && shfInst.length() > 0) {
        shfInst = replaceInstructionPatch(shfInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (aggInst != null && aggInst.length() > 0) {
        aggInst = replaceInstructionPatch(aggInst);
        MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (otherInst != null && otherInst.length() > 0) {
        otherInst = replaceInstructionPatch(otherInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    //difference
    ixMap.retainAll(ixRed);
    //copy result
    byte[] ret = new byte[ixMap.size()];
    int i = 0;
    for (byte ix : ixMap) ret[i++] = ix;
    return ret;
}
Also used : DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MMTSJMRInstruction(org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet)

Aggregations

MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)13 DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)7 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 ArrayList (java.util.ArrayList)4 PMMJMRInstruction (org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction)4 RemoveEmptyMRInstruction (org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction)4 IOException (java.io.IOException)3 HashSet (java.util.HashSet)3 MMTSJMRInstruction (org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction)3 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)3 HashMap (java.util.HashMap)2 Instruction (org.apache.sysml.runtime.instructions.Instruction)2 MRJobInstruction (org.apache.sysml.runtime.instructions.MRJobInstruction)2 AggregateBinaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)2 AggregateInstruction (org.apache.sysml.runtime.instructions.mr.AggregateInstruction)2 CM_N_COVInstruction (org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction)2 CSVReblockInstruction (org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction)2 GroupedAggregateInstruction (org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction)2 IDistributedCacheConsumer (org.apache.sysml.runtime.instructions.mr.IDistributedCacheConsumer)2 MRINSTRUCTION_TYPE (org.apache.sysml.runtime.instructions.mr.MRInstruction.MRINSTRUCTION_TYPE)2