Search in sources :

Example 1 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class MapperBase method configure.

@Override
public void configure(JobConf job) {
    super.configure(job);
    // since one matrix file can occur multiple times in a statement
    try {
        representativeMatrixes = MRJobConfiguration.getInputMatrixIndexesInMapper(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    // get input converter information
    inputConverter = MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
    DataGenMRInstruction[] allDataGenIns;
    MRInstruction[] allMapperIns;
    ReblockInstruction[] allReblockIns;
    CSVReblockInstruction[] allCSVReblockIns;
    try {
        allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
        // parse the instructions on the matrices that this file represent
        allMapperIns = MRJobConfiguration.getInstructionsInMapper(job);
        // parse the reblock instructions on the matrices that this file represent
        allReblockIns = MRJobConfiguration.getReblockInstructions(job);
        allCSVReblockIns = MRJobConfiguration.getCSVReblockInstructions(job);
    } catch (DMLRuntimeException e) {
        throw new RuntimeException(e);
    }
    // get all the output indexes
    byte[] outputs = MRJobConfiguration.getOutputIndexesInMapper(job);
    // get the dimension of all the representative matrices
    rlens = new long[representativeMatrixes.size()];
    clens = new long[representativeMatrixes.size()];
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        rlens[i] = MRJobConfiguration.getNumRows(job, representativeMatrixes.get(i));
        clens[i] = MRJobConfiguration.getNumColumns(job, representativeMatrixes.get(i));
    }
    // get the block sizes of the representative matrices
    brlens = new int[representativeMatrixes.size()];
    bclens = new int[representativeMatrixes.size()];
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        brlens[i] = MRJobConfiguration.getNumRowsPerBlock(job, representativeMatrixes.get(i));
        bclens[i] = MRJobConfiguration.getNumColumnsPerBlock(job, representativeMatrixes.get(i));
    }
    rbounds = new long[representativeMatrixes.size()];
    cbounds = new long[representativeMatrixes.size()];
    lastblockrlens = new int[representativeMatrixes.size()];
    lastblockclens = new int[representativeMatrixes.size()];
    // calculate upper boundaries for key value pairs
    if (valueClass.equals(MatrixBlock.class)) {
        for (int i = 0; i < representativeMatrixes.size(); i++) {
            rbounds[i] = (long) Math.max(Math.ceil((double) rlens[i] / brlens[i]), 1);
            cbounds[i] = (long) Math.max(Math.ceil((double) clens[i] / bclens[i]), 1);
            lastblockrlens[i] = (int) (rlens[i] % brlens[i]);
            lastblockclens[i] = (int) (clens[i] % bclens[i]);
            if (lastblockrlens[i] == 0)
                lastblockrlens[i] = brlens[i];
            if (lastblockclens[i] == 0)
                lastblockclens[i] = bclens[i];
        }
    } else {
        for (int i = 0; i < representativeMatrixes.size(); i++) {
            rbounds[i] = rlens[i];
            cbounds[i] = clens[i];
            lastblockrlens[i] = 1;
            lastblockclens[i] = 1;
        }
    }
    // load data from distributed cache (if required, reuse if jvm_reuse)
    try {
        setupDistCacheFiles(job);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
    // collect unary instructions for each representative matrix
    HashSet<Byte> set = new HashSet<>();
    for (int i = 0; i < representativeMatrixes.size(); i++) {
        set.clear();
        set.add(representativeMatrixes.get(i));
        // collect the relavent datagen instructions for this representative matrix
        ArrayList<DataGenMRInstruction> dataGensForThisMatrix = new ArrayList<>();
        if (allDataGenIns != null) {
            for (DataGenMRInstruction ins : allDataGenIns) {
                if (set.contains(ins.getInput())) {
                    dataGensForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        if (dataGensForThisMatrix.size() > 1)
            throw new RuntimeException("only expects at most one rand instruction per input");
        if (dataGensForThisMatrix.isEmpty())
            dataGen_instructions.add(null);
        else
            dataGen_instructions.add(dataGensForThisMatrix.get(0));
        // collect the relavent instructions for this representative matrix
        ArrayList<MRInstruction> opsForThisMatrix = new ArrayList<>();
        if (allMapperIns != null) {
            for (MRInstruction ins : allMapperIns) {
                try {
                    /*
						boolean toAdd=true;
						for(byte input: ins.getInputIndexes())
							if(!set.contains(input))
							{
								toAdd=false;
								break;
							}
							*/
                    boolean toAdd = false;
                    for (byte input : ins.getInputIndexes()) if (set.contains(input)) {
                        toAdd = true;
                        break;
                    }
                    if (toAdd) {
                        opsForThisMatrix.add(ins);
                        set.add(ins.output);
                    }
                } catch (DMLRuntimeException e) {
                    throw new RuntimeException(e);
                }
            }
        }
        mapper_instructions.add(opsForThisMatrix);
        // collect the relavent reblock instructions for this representative matrix
        ArrayList<ReblockInstruction> reblocksForThisMatrix = new ArrayList<>();
        if (allReblockIns != null) {
            for (ReblockInstruction ins : allReblockIns) {
                if (set.contains(ins.input)) {
                    reblocksForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        reblock_instructions.add(reblocksForThisMatrix);
        // collect the relavent reblock instructions for this representative matrix
        ArrayList<CSVReblockInstruction> csvReblocksForThisMatrix = new ArrayList<>();
        if (allCSVReblockIns != null) {
            for (CSVReblockInstruction ins : allCSVReblockIns) {
                if (set.contains(ins.input)) {
                    csvReblocksForThisMatrix.add(ins);
                    set.add(ins.output);
                }
            }
        }
        csv_reblock_instructions.add(csvReblocksForThisMatrix);
        // collect the output indexes for this representative matrix
        ArrayList<Byte> outsForThisMatrix = new ArrayList<>();
        for (byte output : outputs) {
            if (set.contains(output))
                outsForThisMatrix.add(output);
        }
        outputIndexes.add(outsForThisMatrix);
    }
}
Also used : CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) ArrayList(java.util.ArrayList) IOException(java.io.IOException) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) HashSet(java.util.HashSet)

Example 2 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class MatrixCharacteristics method computeDimension.

public static void computeDimension(HashMap<Byte, MatrixCharacteristics> dims, MRInstruction ins) {
    MatrixCharacteristics dimOut = dims.get(ins.output);
    if (dimOut == null) {
        dimOut = new MatrixCharacteristics();
        dims.put(ins.output, dimOut);
    }
    if (ins instanceof ReorgInstruction) {
        ReorgInstruction realIns = (ReorgInstruction) ins;
        reorg(dims.get(realIns.input), (ReorgOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof AppendInstruction) {
        AppendInstruction realIns = (AppendInstruction) ins;
        MatrixCharacteristics in_dim1 = dims.get(realIns.input1);
        MatrixCharacteristics in_dim2 = dims.get(realIns.input2);
        if (realIns.isCBind())
            dimOut.set(in_dim1.numRows, in_dim1.numColumns + in_dim2.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
        else
            dimOut.set(in_dim1.numRows + in_dim2.numRows, in_dim1.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
    } else if (ins instanceof CumulativeAggregateInstruction) {
        AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
        MatrixCharacteristics in = dims.get(realIns.input);
        dimOut.set((long) Math.ceil((double) in.getRows() / in.getRowsPerBlock()), in.getCols(), in.getRowsPerBlock(), in.getColsPerBlock());
    } else if (ins instanceof AggregateUnaryInstruction) {
        AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
        aggregateUnary(dims.get(realIns.input), (AggregateUnaryOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof AggregateBinaryInstruction) {
        AggregateBinaryInstruction realIns = (AggregateBinaryInstruction) ins;
        aggregateBinary(dims.get(realIns.input1), dims.get(realIns.input2), (AggregateBinaryOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof MapMultChainInstruction) {
        // output size independent of chain type
        MapMultChainInstruction realIns = (MapMultChainInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
        MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
        dimOut.set(mc1.numColumns, mc2.numColumns, mc1.numRowsPerBlock, mc1.numColumnsPerBlock);
    } else if (ins instanceof QuaternaryInstruction) {
        QuaternaryInstruction realIns = (QuaternaryInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
        MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
        MatrixCharacteristics mc3 = dims.get(realIns.getInput3());
        realIns.computeMatrixCharacteristics(mc1, mc2, mc3, dimOut);
    } else if (ins instanceof ReblockInstruction) {
        ReblockInstruction realIns = (ReblockInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(realIns.input);
        dimOut.set(in_dim.numRows, in_dim.numColumns, realIns.brlen, realIns.bclen, in_dim.nonZero);
    } else if (ins instanceof MatrixReshapeMRInstruction) {
        MatrixReshapeMRInstruction mrinst = (MatrixReshapeMRInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(mrinst.input);
        dimOut.set(mrinst.getNumRows(), mrinst.getNumColunms(), in_dim.getRowsPerBlock(), in_dim.getColsPerBlock(), in_dim.getNonZeros());
    } else if (ins instanceof RandInstruction || ins instanceof SeqInstruction) {
        DataGenMRInstruction dataIns = (DataGenMRInstruction) ins;
        dimOut.set(dims.get(dataIns.getInput()));
    } else if (ins instanceof ReplicateInstruction) {
        ReplicateInstruction realIns = (ReplicateInstruction) ins;
        realIns.computeOutputDimension(dims.get(realIns.input), dimOut);
    } else if (// before unary
    ins instanceof ParameterizedBuiltinMRInstruction) {
        ParameterizedBuiltinMRInstruction realIns = (ParameterizedBuiltinMRInstruction) ins;
        realIns.computeOutputCharacteristics(dims.get(realIns.input), dimOut);
    } else if (ins instanceof ScalarInstruction || ins instanceof AggregateInstruction || (ins instanceof UnaryInstruction && !(ins instanceof MMTSJMRInstruction)) || ins instanceof ZeroOutInstruction) {
        UnaryMRInstructionBase realIns = (UnaryMRInstructionBase) ins;
        dimOut.set(dims.get(realIns.input));
    } else if (ins instanceof MMTSJMRInstruction) {
        MMTSJMRInstruction mmtsj = (MMTSJMRInstruction) ins;
        MMTSJType tstype = mmtsj.getMMTSJType();
        MatrixCharacteristics mc = dims.get(mmtsj.input);
        dimOut.set(tstype.isLeft() ? mc.numColumns : mc.numRows, tstype.isLeft() ? mc.numColumns : mc.numRows, mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (ins instanceof PMMJMRInstruction) {
        PMMJMRInstruction pmmins = (PMMJMRInstruction) ins;
        MatrixCharacteristics mc = dims.get(pmmins.input2);
        dimOut.set(pmmins.getNumRows(), mc.numColumns, mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (ins instanceof RemoveEmptyMRInstruction) {
        RemoveEmptyMRInstruction realIns = (RemoveEmptyMRInstruction) ins;
        MatrixCharacteristics mc = dims.get(realIns.input1);
        long min = realIns.isEmptyReturn() ? 1 : 0;
        if (realIns.isRemoveRows())
            dimOut.set(Math.max(realIns.getOutputLen(), min), mc.getCols(), mc.numRowsPerBlock, mc.numColumnsPerBlock);
        else
            dimOut.set(mc.getRows(), Math.max(realIns.getOutputLen(), min), mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (// needs to be checked before binary
    ins instanceof UaggOuterChainInstruction) {
        UaggOuterChainInstruction realIns = (UaggOuterChainInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        MatrixCharacteristics mc2 = dims.get(realIns.input2);
        realIns.computeOutputCharacteristics(mc1, mc2, dimOut);
    } else if (ins instanceof GroupedAggregateMInstruction) {
        GroupedAggregateMInstruction realIns = (GroupedAggregateMInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        realIns.computeOutputCharacteristics(mc1, dimOut);
    } else if (ins instanceof BinaryInstruction || ins instanceof BinaryMInstruction || ins instanceof CombineBinaryInstruction) {
        BinaryMRInstructionBase realIns = (BinaryMRInstructionBase) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        MatrixCharacteristics mc2 = dims.get(realIns.input2);
        if (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && // outer
        mc2.getCols() > 1) {
            dimOut.set(mc1.getRows(), mc2.getCols(), mc1.getRowsPerBlock(), mc2.getColsPerBlock());
        } else {
            // default case
            dimOut.set(mc1);
        }
    } else if (ins instanceof TernaryInstruction) {
        dimOut.set(dims.get(ins.getInputIndexes()[0]));
    } else if (ins instanceof CombineTernaryInstruction) {
        CtableInstruction realIns = (CtableInstruction) ins;
        dimOut.set(dims.get(realIns.input1));
    } else if (ins instanceof CombineUnaryInstruction) {
        dimOut.set(dims.get(((CombineUnaryInstruction) ins).input));
    } else if (ins instanceof CM_N_COVInstruction || ins instanceof GroupedAggregateInstruction) {
        dimOut.set(1, 1, 1, 1);
    } else if (ins instanceof RangeBasedReIndexInstruction) {
        RangeBasedReIndexInstruction realIns = (RangeBasedReIndexInstruction) ins;
        MatrixCharacteristics dimIn = dims.get(realIns.input);
        realIns.computeOutputCharacteristics(dimIn, dimOut);
    } else if (ins instanceof CtableInstruction) {
        CtableInstruction realIns = (CtableInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(realIns.input1);
        dimOut.set(realIns.getOutputDim1(), realIns.getOutputDim2(), in_dim.numRowsPerBlock, in_dim.numColumnsPerBlock);
    } else {
        /*
			 * if ins is none of the above cases then we assume that dim_out dimensions are unknown
			 */
        dimOut.numRows = -1;
        dimOut.numColumns = -1;
        dimOut.numRowsPerBlock = 1;
        dimOut.numColumnsPerBlock = 1;
    }
}
Also used : TernaryInstruction(org.apache.sysml.runtime.instructions.mr.TernaryInstruction) CombineTernaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineTernaryInstruction) CombineTernaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineTernaryInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) AggregateUnaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction) CombineUnaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction) UnaryInstruction(org.apache.sysml.runtime.instructions.mr.UnaryInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) GroupedAggregateMInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateMInstruction) CombineUnaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction) MatrixReshapeMRInstruction(org.apache.sysml.runtime.instructions.mr.MatrixReshapeMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) CombineBinaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineBinaryInstruction) BinaryInstruction(org.apache.sysml.runtime.instructions.mr.BinaryInstruction) ZeroOutInstruction(org.apache.sysml.runtime.instructions.mr.ZeroOutInstruction) QuaternaryInstruction(org.apache.sysml.runtime.instructions.mr.QuaternaryInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) MMTSJType(org.apache.sysml.lops.MMTSJ.MMTSJType) ReplicateInstruction(org.apache.sysml.runtime.instructions.mr.ReplicateInstruction) CumulativeAggregateInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) CombineBinaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineBinaryInstruction) BinaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.BinaryMRInstructionBase) CM_N_COVInstruction(org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction) AggregateUnaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction) ParameterizedBuiltinMRInstruction(org.apache.sysml.runtime.instructions.mr.ParameterizedBuiltinMRInstruction) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) RangeBasedReIndexInstruction(org.apache.sysml.runtime.instructions.mr.RangeBasedReIndexInstruction) AppendInstruction(org.apache.sysml.runtime.instructions.mr.AppendInstruction) ScalarInstruction(org.apache.sysml.runtime.instructions.mr.ScalarInstruction) ReorgInstruction(org.apache.sysml.runtime.instructions.mr.ReorgInstruction) CtableInstruction(org.apache.sysml.runtime.instructions.mr.CtableInstruction) CumulativeAggregateInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) UaggOuterChainInstruction(org.apache.sysml.runtime.instructions.mr.UaggOuterChainInstruction) MMTSJMRInstruction(org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Example 3 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.

the class CostEstimatorStaticRuntime method getMapOutputIndexes.

private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) {
    // note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
    // map indices
    HashSet<Byte> ixMap = new HashSet<>();
    for (byte ix : inIx) ixMap.add(ix);
    if (rdInst != null && rdInst.length() > 0) {
        rdInst = replaceInstructionPatch(rdInst);
        DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
        for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    if (mapInst != null && mapInst.length() > 0) {
        mapInst = replaceInstructionPatch(mapInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    // reduce indices
    HashSet<Byte> ixRed = new HashSet<>();
    for (byte ix : retIx) ixRed.add(ix);
    if (shfInst != null && shfInst.length() > 0) {
        shfInst = replaceInstructionPatch(shfInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (aggInst != null && aggInst.length() > 0) {
        aggInst = replaceInstructionPatch(aggInst);
        MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (otherInst != null && otherInst.length() > 0) {
        otherInst = replaceInstructionPatch(otherInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    // difference
    ixMap.retainAll(ixRed);
    // copy result
    byte[] ret = new byte[ixMap.size()];
    int i = 0;
    for (byte ix : ixMap) ret[i++] = ix;
    return ret;
}
Also used : DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MMTSJMRInstruction(org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet)

Example 4 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.

the class MRJobConfiguration method computeMatrixCharacteristics.

/**
 * NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
 * otherwise, the latter will potentially fail with missing dimension information.
 *
 * @param job job configuration
 * @param inputIndexes array of byte indexes
 * @param dataGenInstructions data gen instructions as a string
 * @param instructionsInMapper instruction in mapper as a string
 * @param reblockInstructions reblock instructions as a string
 * @param aggInstructionsInReducer aggregate instructions in reducer as a string
 * @param aggBinInstructions binary aggregate instructions as a string
 * @param otherInstructionsInReducer other instructions in reducer as a string
 * @param resultIndexes array of byte result indexes
 * @param mapOutputIndexes set of map output indexes
 * @param forMMCJ ?
 * @return reducer groups
 */
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) {
    HashSet<Byte> intermediateMatrixIndexes = new HashSet<>();
    HashMap<Byte, MatrixCharacteristics> dims = new HashMap<>();
    for (byte i : inputIndexes) {
        MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
        dims.put(i, dim);
    }
    DataGenMRInstruction[] dataGenIns = null;
    dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
    if (dataGenIns != null) {
        for (DataGenMRInstruction ins : dataGenIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
        }
    }
    MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
    if (insMapper != null) {
        for (MRInstruction ins : insMapper) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof AppendMInstruction) {
                AppendMInstruction tempIns = (AppendMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AppendGInstruction) {
                AppendGInstruction tempIns = (AppendGInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof BinaryMInstruction) {
                BinaryMInstruction tempIns = (BinaryMInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof AggregateBinaryInstruction) {
                AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                // TODO
                intermediateMatrixIndexes.add(tempIns.input1);
            } else if (ins instanceof MapMultChainInstruction) {
                MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
                intermediateMatrixIndexes.add(tempIns.getInput1());
            } else if (ins instanceof PMMJMRInstruction) {
                PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
                intermediateMatrixIndexes.add(tempIns.input2);
            }
        }
    }
    ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
    if (reblockIns != null) {
        for (ReblockInstruction ins : reblockIns) {
            MatrixCharacteristics.computeDimension(dims, ins);
            setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
        }
    }
    Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
    if (aggIns != null) {
        for (Instruction ins : aggIns) {
            MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            MRInstruction mrins = (MRInstruction) ins;
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == mrins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
                intermediateMatrixIndexes.add(mrins.output);
            }
        }
    }
    long numReduceGroups = 0;
    AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
    if (aggBinIns != null) {
        for (AggregateBinaryInstruction ins : aggBinIns) {
            MatrixCharacteristics dim1 = dims.get(ins.input1);
            MatrixCharacteristics dim2 = dims.get(ins.input2);
            setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
            setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
            MatrixCharacteristics.computeDimension(dims, ins);
            if (// there will be only one aggbin operation for MMCJ
            forMMCJ)
                numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
        }
    }
    if (!forMMCJ) {
        // store the skylines
        ArrayList<Long> xs = new ArrayList<>(mapOutputIndexes.size());
        ArrayList<Long> ys = new ArrayList<>(mapOutputIndexes.size());
        for (byte idx : mapOutputIndexes) {
            MatrixCharacteristics dim = dims.get(idx);
            long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
            long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
            int i = 0;
            boolean toadd = true;
            while (i < xs.size()) {
                if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
                    // remove any included x's and y's
                    xs.remove(i);
                    ys.remove(i);
                } else if (// if included in others, stop
                x <= xs.get(i) && y <= ys.get(i)) {
                    toadd = false;
                    break;
                } else
                    i++;
            }
            if (toadd) {
                xs.add(x);
                ys.add(y);
            }
        }
        // sort by x
        TreeMap<Long, Long> map = new TreeMap<>();
        for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
        numReduceGroups = 0;
        // compute area
        long prev = 0;
        for (Entry<Long, Long> e : map.entrySet()) {
            numReduceGroups += (e.getKey() - prev) * e.getValue();
            prev = e.getKey();
        }
    }
    MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
    if (insReducer != null) {
        for (MRInstruction ins : insReducer) {
            MatrixCharacteristics.computeDimension(dims, ins);
            if (ins instanceof UnaryMRInstructionBase) {
                UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
                intermediateMatrixIndexes.add(tempIns.input);
            } else if (ins instanceof RemoveEmptyMRInstruction) {
                RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
                setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
                intermediateMatrixIndexes.add(tempIns.input1);
            }
            // if instruction's output is not in resultIndexes, then add its dimensions to jobconf
            boolean found = false;
            for (byte b : resultIndexes) {
                if (b == ins.output) {
                    found = true;
                    break;
                }
            }
            if (!found) {
                setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
                intermediateMatrixIndexes.add(ins.output);
            }
        }
    }
    setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
    for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
    MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
    MatrixCharacteristics resultDims;
    for (int i = 0; i < resultIndexes.length; i++) {
        resultDims = dims.get(resultIndexes[i]);
        stats[i] = resultDims;
        setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
    }
    return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) CSVWriteInstruction(org.apache.sysml.runtime.instructions.mr.CSVWriteInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) Instruction(org.apache.sysml.runtime.instructions.Instruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) CM_N_COVInstruction(org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) CSVReblockInstruction(org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet) AppendGInstruction(org.apache.sysml.runtime.instructions.mr.AppendGInstruction) TreeMap(java.util.TreeMap) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) AppendMInstruction(org.apache.sysml.runtime.instructions.mr.AppendMInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Example 5 with DataGenMRInstruction

use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.

the class CostEstimatorStaticRuntime method getMapOutputIndexes.

private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) {
    // note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
    // map indices
    HashSet<Byte> ixMap = new HashSet<>();
    for (byte ix : inIx) ixMap.add(ix);
    if (rdInst != null && rdInst.length() > 0) {
        rdInst = replaceInstructionPatch(rdInst);
        DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
        for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    if (mapInst != null && mapInst.length() > 0) {
        mapInst = replaceInstructionPatch(mapInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
    }
    // reduce indices
    HashSet<Byte> ixRed = new HashSet<>();
    for (byte ix : retIx) ixRed.add(ix);
    if (shfInst != null && shfInst.length() > 0) {
        shfInst = replaceInstructionPatch(shfInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (aggInst != null && aggInst.length() > 0) {
        aggInst = replaceInstructionPatch(aggInst);
        MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    if (otherInst != null && otherInst.length() > 0) {
        otherInst = replaceInstructionPatch(otherInst);
        MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
        for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
    }
    // difference
    ixMap.retainAll(ixRed);
    // copy result
    byte[] ret = new byte[ixMap.size()];
    int i = 0;
    for (byte ix : ixMap) ret[i++] = ix;
    return ret;
}
Also used : DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) MMTSJMRInstruction(org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction) MRInstruction(org.apache.sysml.runtime.instructions.mr.MRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) HashSet(java.util.HashSet)

Aggregations

DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)14 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)8 RandInstruction (org.apache.sysml.runtime.instructions.mr.RandInstruction)8 HashSet (java.util.HashSet)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 PMMJMRInstruction (org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction)6 ReblockInstruction (org.apache.sysml.runtime.instructions.mr.ReblockInstruction)6 RemoveEmptyMRInstruction (org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction)6 SeqInstruction (org.apache.sysml.runtime.instructions.mr.SeqInstruction)5 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 AggregateBinaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)4 AggregateInstruction (org.apache.sysml.runtime.instructions.mr.AggregateInstruction)4 BinaryMInstruction (org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)4 CM_N_COVInstruction (org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction)4 CSVReblockInstruction (org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction)4 GroupedAggregateInstruction (org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction)4 MMTSJMRInstruction (org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction)4 MapMultChainInstruction (org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction)4 UnaryMRInstructionBase (org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase)4