Search in sources :

Example 16 with AggregateBinaryInstruction

use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.

the class MMRJMRReducer method reduce.

// private MatrixIndexes indexBuf=new MatrixIndexes();
@Override
public void reduce(TripleIndexes triple, Iterator<TaggedMatrixValue> values, OutputCollector<MatrixIndexes, MatrixValue> out, Reporter report) throws IOException {
    long start = System.currentTimeMillis();
    commonSetup(report);
    // output previous results if needed
    if (prevIndexes.getFirstIndex() != triple.getFirstIndex() || prevIndexes.getSecondIndex() != triple.getSecondIndex()) {
        // perform mixed operations
        processReducerInstructions();
        // output results
        outputResultsFromCachedValues(report);
        cachedValues.reset();
    } else {
        // clear the buffer
        for (AggregateBinaryInstruction aggBinInstruction : aggBinInstructions) {
            cachedValues.remove(aggBinInstruction.input1);
            cachedValues.remove(aggBinInstruction.input2);
        }
    }
    // perform aggregation first
    aggIndexes.setIndexes(triple.getFirstIndex(), triple.getSecondIndex());
    processAggregateInstructions(aggIndexes, values);
    // perform aggbinary for this group
    for (AggregateBinaryInstruction aggBinInstruction : aggBinInstructions) processAggBinaryPerGroup(aggIndexes, aggBinInstruction);
    prevIndexes.setIndexes(triple);
    report.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}
Also used : AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)

Example 17 with AggregateBinaryInstruction

use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.

the class MMCJMR method runJob.

public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, String output, OutputInfo outputinfo) throws Exception {
    JobConf job = new JobConf(MMCJMR.class);
    // TODO: check w/ yuanyuan. This job always runs in blocked mode, and hence derivation is not necessary.
    boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);
    // by default, assume that dimensions of MMCJ's output are known at compile time
    byte resultDimsUnknown = (byte) 0;
    MatrixCharacteristics[] stats = commonSetup(job, inBlockRepresentation, inputs, inputInfos, rlens, clens, brlens, bclens, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, numReducers, replication, resultDimsUnknown, output, outputinfo);
    // Print the complete instruction
    if (LOG.isTraceEnabled())
        inst.printCompleteMRJobInstruction(stats);
    // There is always a single output
    if (stats[0].getRows() == -1 || stats[0].getCols() == -1) {
        resultDimsUnknown = (byte) 1;
        // if the dimensions are unknown, then setup done in commonSetup() must be updated
        byte[] resultIndexes = new byte[] { MRInstructionParser.parseSingleInstruction(aggBinInstrction).output };
        byte[] resultDimsUnknown_Array = new byte[] { resultDimsUnknown };
        // set up the multiple output files, and their format information
        MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[] { output }, new OutputInfo[] { outputinfo }, inBlockRepresentation);
    }
    AggregateBinaryInstruction ins = (AggregateBinaryInstruction) MRInstructionParser.parseSingleInstruction(aggBinInstrction);
    MatrixCharacteristics dim1 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input1);
    MatrixCharacteristics dim2 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input2);
    if (dim1.getRowsPerBlock() > dim1.getRows())
        dim1.setRowsPerBlock((int) dim1.getRows());
    if (dim1.getColsPerBlock() > dim1.getCols())
        dim1.setColsPerBlock((int) dim1.getCols());
    if (dim2.getRowsPerBlock() > dim2.getRows())
        dim2.setRowsPerBlock((int) dim2.getRows());
    if (dim2.getColsPerBlock() > dim2.getCols())
        dim2.setColsPerBlock((int) dim2.getCols());
    long blockSize1 = 77 + 8 * dim1.getRowsPerBlock() * dim1.getColsPerBlock();
    long blockSize2 = 77 + 8 * dim2.getRowsPerBlock() * dim2.getColsPerBlock();
    long blockSizeResult = 77 + 8 * dim1.getRowsPerBlock() * dim2.getColsPerBlock();
    long cacheSize = -1;
    // cache the first result
    if (dim1.getRows() < dim2.getCols()) {
        long numBlocks = (long) Math.ceil((double) dim1.getRows() / (double) dim1.getRowsPerBlock());
        cacheSize = numBlocks * (20 + blockSize1) + 32;
    } else // cache the second result
    {
        long numBlocks = (long) Math.ceil((double) dim2.getCols() / (double) dim2.getColsPerBlock());
        cacheSize = numBlocks * (20 + blockSize2) + 32;
    }
    // add known memory consumption (will be substracted from output buffer)
    cacheSize += // the cached key-value pair  (plus input instance)
    2 * Math.max(blockSize1, blockSize2) + // the cached single result
    blockSizeResult + // misc memory requirement by hadoop
    MRJobConfiguration.getMiscMemRequired(job);
    MRJobConfiguration.setMMCJCacheSize(job, (int) cacheSize);
    // set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    // run mmcj job
    RunningJob runjob = JobClient.runJob(job);
    /* Process different counters */
    // NOTE: MMCJ job always has only a single output.
    // Hence, no need to scan resultIndexes[] like other jobs
    int outputIndex = 0;
    Byte outputMatrixID = MRInstructionParser.parseSingleInstruction(aggBinInstrction).output;
    Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
    // number of non-zeros
    stats[outputIndex].setNonZeros(group.getCounter(Byte.toString(outputMatrixID)));
    return new JobReturn(stats[outputIndex], outputinfo, runjob.isSuccessful());
}
Also used : Group(org.apache.hadoop.mapred.Counters.Group) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 18 with AggregateBinaryInstruction

use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.

the class MatrixCharacteristics method computeDimension.

public static void computeDimension(HashMap<Byte, MatrixCharacteristics> dims, MRInstruction ins) {
    MatrixCharacteristics dimOut = dims.get(ins.output);
    if (dimOut == null) {
        dimOut = new MatrixCharacteristics();
        dims.put(ins.output, dimOut);
    }
    if (ins instanceof ReorgInstruction) {
        ReorgInstruction realIns = (ReorgInstruction) ins;
        reorg(dims.get(realIns.input), (ReorgOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof AppendInstruction) {
        AppendInstruction realIns = (AppendInstruction) ins;
        MatrixCharacteristics in_dim1 = dims.get(realIns.input1);
        MatrixCharacteristics in_dim2 = dims.get(realIns.input2);
        if (realIns.isCBind())
            dimOut.set(in_dim1.numRows, in_dim1.numColumns + in_dim2.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
        else
            dimOut.set(in_dim1.numRows + in_dim2.numRows, in_dim1.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
    } else if (ins instanceof CumulativeAggregateInstruction) {
        AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
        MatrixCharacteristics in = dims.get(realIns.input);
        dimOut.set((long) Math.ceil((double) in.getRows() / in.getRowsPerBlock()), in.getCols(), in.getRowsPerBlock(), in.getColsPerBlock());
    } else if (ins instanceof AggregateUnaryInstruction) {
        AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
        aggregateUnary(dims.get(realIns.input), (AggregateUnaryOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof AggregateBinaryInstruction) {
        AggregateBinaryInstruction realIns = (AggregateBinaryInstruction) ins;
        aggregateBinary(dims.get(realIns.input1), dims.get(realIns.input2), (AggregateBinaryOperator) realIns.getOperator(), dimOut);
    } else if (ins instanceof MapMultChainInstruction) {
        // output size independent of chain type
        MapMultChainInstruction realIns = (MapMultChainInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
        MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
        dimOut.set(mc1.numColumns, mc2.numColumns, mc1.numRowsPerBlock, mc1.numColumnsPerBlock);
    } else if (ins instanceof QuaternaryInstruction) {
        QuaternaryInstruction realIns = (QuaternaryInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
        MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
        MatrixCharacteristics mc3 = dims.get(realIns.getInput3());
        realIns.computeMatrixCharacteristics(mc1, mc2, mc3, dimOut);
    } else if (ins instanceof ReblockInstruction) {
        ReblockInstruction realIns = (ReblockInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(realIns.input);
        dimOut.set(in_dim.numRows, in_dim.numColumns, realIns.brlen, realIns.bclen, in_dim.nonZero);
    } else if (ins instanceof MatrixReshapeMRInstruction) {
        MatrixReshapeMRInstruction mrinst = (MatrixReshapeMRInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(mrinst.input);
        dimOut.set(mrinst.getNumRows(), mrinst.getNumColunms(), in_dim.getRowsPerBlock(), in_dim.getColsPerBlock(), in_dim.getNonZeros());
    } else if (ins instanceof RandInstruction || ins instanceof SeqInstruction) {
        DataGenMRInstruction dataIns = (DataGenMRInstruction) ins;
        dimOut.set(dims.get(dataIns.getInput()));
    } else if (ins instanceof ReplicateInstruction) {
        ReplicateInstruction realIns = (ReplicateInstruction) ins;
        realIns.computeOutputDimension(dims.get(realIns.input), dimOut);
    } else if (// before unary
    ins instanceof ParameterizedBuiltinMRInstruction) {
        ParameterizedBuiltinMRInstruction realIns = (ParameterizedBuiltinMRInstruction) ins;
        realIns.computeOutputCharacteristics(dims.get(realIns.input), dimOut);
    } else if (ins instanceof ScalarInstruction || ins instanceof AggregateInstruction || (ins instanceof UnaryInstruction && !(ins instanceof MMTSJMRInstruction)) || ins instanceof ZeroOutInstruction) {
        UnaryMRInstructionBase realIns = (UnaryMRInstructionBase) ins;
        dimOut.set(dims.get(realIns.input));
    } else if (ins instanceof MMTSJMRInstruction) {
        MMTSJMRInstruction mmtsj = (MMTSJMRInstruction) ins;
        MMTSJType tstype = mmtsj.getMMTSJType();
        MatrixCharacteristics mc = dims.get(mmtsj.input);
        dimOut.set(tstype.isLeft() ? mc.numColumns : mc.numRows, tstype.isLeft() ? mc.numColumns : mc.numRows, mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (ins instanceof PMMJMRInstruction) {
        PMMJMRInstruction pmmins = (PMMJMRInstruction) ins;
        MatrixCharacteristics mc = dims.get(pmmins.input2);
        dimOut.set(pmmins.getNumRows(), mc.numColumns, mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (ins instanceof RemoveEmptyMRInstruction) {
        RemoveEmptyMRInstruction realIns = (RemoveEmptyMRInstruction) ins;
        MatrixCharacteristics mc = dims.get(realIns.input1);
        long min = realIns.isEmptyReturn() ? 1 : 0;
        if (realIns.isRemoveRows())
            dimOut.set(Math.max(realIns.getOutputLen(), min), mc.getCols(), mc.numRowsPerBlock, mc.numColumnsPerBlock);
        else
            dimOut.set(mc.getRows(), Math.max(realIns.getOutputLen(), min), mc.numRowsPerBlock, mc.numColumnsPerBlock);
    } else if (// needs to be checked before binary
    ins instanceof UaggOuterChainInstruction) {
        UaggOuterChainInstruction realIns = (UaggOuterChainInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        MatrixCharacteristics mc2 = dims.get(realIns.input2);
        realIns.computeOutputCharacteristics(mc1, mc2, dimOut);
    } else if (ins instanceof GroupedAggregateMInstruction) {
        GroupedAggregateMInstruction realIns = (GroupedAggregateMInstruction) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        realIns.computeOutputCharacteristics(mc1, dimOut);
    } else if (ins instanceof BinaryInstruction || ins instanceof BinaryMInstruction || ins instanceof CombineBinaryInstruction) {
        BinaryMRInstructionBase realIns = (BinaryMRInstructionBase) ins;
        MatrixCharacteristics mc1 = dims.get(realIns.input1);
        MatrixCharacteristics mc2 = dims.get(realIns.input2);
        if (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && // outer
        mc2.getCols() > 1) {
            dimOut.set(mc1.getRows(), mc2.getCols(), mc1.getRowsPerBlock(), mc2.getColsPerBlock());
        } else {
            // default case
            dimOut.set(mc1);
        }
    } else if (ins instanceof TernaryInstruction) {
        dimOut.set(dims.get(ins.getInputIndexes()[0]));
    } else if (ins instanceof CombineTernaryInstruction) {
        CtableInstruction realIns = (CtableInstruction) ins;
        dimOut.set(dims.get(realIns.input1));
    } else if (ins instanceof CombineUnaryInstruction) {
        dimOut.set(dims.get(((CombineUnaryInstruction) ins).input));
    } else if (ins instanceof CM_N_COVInstruction || ins instanceof GroupedAggregateInstruction) {
        dimOut.set(1, 1, 1, 1);
    } else if (ins instanceof RangeBasedReIndexInstruction) {
        RangeBasedReIndexInstruction realIns = (RangeBasedReIndexInstruction) ins;
        MatrixCharacteristics dimIn = dims.get(realIns.input);
        realIns.computeOutputCharacteristics(dimIn, dimOut);
    } else if (ins instanceof CtableInstruction) {
        CtableInstruction realIns = (CtableInstruction) ins;
        MatrixCharacteristics in_dim = dims.get(realIns.input1);
        dimOut.set(realIns.getOutputDim1(), realIns.getOutputDim2(), in_dim.numRowsPerBlock, in_dim.numColumnsPerBlock);
    } else {
        /*
			 * if ins is none of the above cases then we assume that dim_out dimensions are unknown
			 */
        dimOut.numRows = -1;
        dimOut.numColumns = -1;
        dimOut.numRowsPerBlock = 1;
        dimOut.numColumnsPerBlock = 1;
    }
}
Also used : TernaryInstruction(org.apache.sysml.runtime.instructions.mr.TernaryInstruction) CombineTernaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineTernaryInstruction) CombineTernaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineTernaryInstruction) ReblockInstruction(org.apache.sysml.runtime.instructions.mr.ReblockInstruction) DataGenMRInstruction(org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction) AggregateUnaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction) CombineUnaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction) UnaryInstruction(org.apache.sysml.runtime.instructions.mr.UnaryInstruction) PMMJMRInstruction(org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction) GroupedAggregateMInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateMInstruction) CombineUnaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineUnaryInstruction) MatrixReshapeMRInstruction(org.apache.sysml.runtime.instructions.mr.MatrixReshapeMRInstruction) RemoveEmptyMRInstruction(org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) CombineBinaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineBinaryInstruction) BinaryInstruction(org.apache.sysml.runtime.instructions.mr.BinaryInstruction) ZeroOutInstruction(org.apache.sysml.runtime.instructions.mr.ZeroOutInstruction) QuaternaryInstruction(org.apache.sysml.runtime.instructions.mr.QuaternaryInstruction) UnaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase) MapMultChainInstruction(org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction) MMTSJType(org.apache.sysml.lops.MMTSJ.MMTSJType) ReplicateInstruction(org.apache.sysml.runtime.instructions.mr.ReplicateInstruction) CumulativeAggregateInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) AggregateInstruction(org.apache.sysml.runtime.instructions.mr.AggregateInstruction) CombineBinaryInstruction(org.apache.sysml.runtime.instructions.mr.CombineBinaryInstruction) BinaryMRInstructionBase(org.apache.sysml.runtime.instructions.mr.BinaryMRInstructionBase) CM_N_COVInstruction(org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction) AggregateUnaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction) ParameterizedBuiltinMRInstruction(org.apache.sysml.runtime.instructions.mr.ParameterizedBuiltinMRInstruction) SeqInstruction(org.apache.sysml.runtime.instructions.mr.SeqInstruction) RandInstruction(org.apache.sysml.runtime.instructions.mr.RandInstruction) RangeBasedReIndexInstruction(org.apache.sysml.runtime.instructions.mr.RangeBasedReIndexInstruction) AppendInstruction(org.apache.sysml.runtime.instructions.mr.AppendInstruction) ScalarInstruction(org.apache.sysml.runtime.instructions.mr.ScalarInstruction) ReorgInstruction(org.apache.sysml.runtime.instructions.mr.ReorgInstruction) CtableInstruction(org.apache.sysml.runtime.instructions.mr.CtableInstruction) CumulativeAggregateInstruction(org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction) AggregateBinaryInstruction(org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction) AggregateUnaryOperator(org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator) UaggOuterChainInstruction(org.apache.sysml.runtime.instructions.mr.UaggOuterChainInstruction) MMTSJMRInstruction(org.apache.sysml.runtime.instructions.mr.MMTSJMRInstruction) GroupedAggregateInstruction(org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction) BinaryMInstruction(org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)

Aggregations

AggregateBinaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction)18 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)8 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)8 BinaryMInstruction (org.apache.sysml.runtime.instructions.mr.BinaryMInstruction)6 DataGenMRInstruction (org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction)6 PMMJMRInstruction (org.apache.sysml.runtime.instructions.mr.PMMJMRInstruction)6 RemoveEmptyMRInstruction (org.apache.sysml.runtime.instructions.mr.RemoveEmptyMRInstruction)6 UnaryMRInstructionBase (org.apache.sysml.runtime.instructions.mr.UnaryMRInstructionBase)6 ArrayList (java.util.ArrayList)4 AggregateInstruction (org.apache.sysml.runtime.instructions.mr.AggregateInstruction)4 AggregateUnaryInstruction (org.apache.sysml.runtime.instructions.mr.AggregateUnaryInstruction)4 AppendGInstruction (org.apache.sysml.runtime.instructions.mr.AppendGInstruction)4 AppendMInstruction (org.apache.sysml.runtime.instructions.mr.AppendMInstruction)4 BinaryMRInstructionBase (org.apache.sysml.runtime.instructions.mr.BinaryMRInstructionBase)4 CM_N_COVInstruction (org.apache.sysml.runtime.instructions.mr.CM_N_COVInstruction)4 CumulativeAggregateInstruction (org.apache.sysml.runtime.instructions.mr.CumulativeAggregateInstruction)4 GroupedAggregateInstruction (org.apache.sysml.runtime.instructions.mr.GroupedAggregateInstruction)4 MRInstruction (org.apache.sysml.runtime.instructions.mr.MRInstruction)4 MapMultChainInstruction (org.apache.sysml.runtime.instructions.mr.MapMultChainInstruction)4 MatrixReshapeMRInstruction (org.apache.sysml.runtime.instructions.mr.MatrixReshapeMRInstruction)4