use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.
the class MMRJMRReducer method reduce.
// private MatrixIndexes indexBuf=new MatrixIndexes();
@Override
public void reduce(TripleIndexes triple, Iterator<TaggedMatrixValue> values, OutputCollector<MatrixIndexes, MatrixValue> out, Reporter report) throws IOException {
long start = System.currentTimeMillis();
commonSetup(report);
// output previous results if needed
if (prevIndexes.getFirstIndex() != triple.getFirstIndex() || prevIndexes.getSecondIndex() != triple.getSecondIndex()) {
// perform mixed operations
processReducerInstructions();
// output results
outputResultsFromCachedValues(report);
cachedValues.reset();
} else {
// clear the buffer
for (AggregateBinaryInstruction aggBinInstruction : aggBinInstructions) {
cachedValues.remove(aggBinInstruction.input1);
cachedValues.remove(aggBinInstruction.input2);
}
}
// perform aggregation first
aggIndexes.setIndexes(triple.getFirstIndex(), triple.getSecondIndex());
processAggregateInstructions(aggIndexes, values);
// perform aggbinary for this group
for (AggregateBinaryInstruction aggBinInstruction : aggBinInstructions) processAggBinaryPerGroup(aggIndexes, aggBinInstruction);
prevIndexes.setIndexes(triple);
report.incrCounter(Counters.COMBINE_OR_REDUCE_TIME, System.currentTimeMillis() - start);
}
use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.
the class MMCJMR method runJob.
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String instructionsInMapper, String aggInstructionsInReducer, String aggBinInstrction, int numReducers, int replication, String output, OutputInfo outputinfo) throws Exception {
JobConf job = new JobConf(MMCJMR.class);
// TODO: check w/ yuanyuan. This job always runs in blocked mode, and hence derivation is not necessary.
boolean inBlockRepresentation = MRJobConfiguration.deriveRepresentation(inputInfos);
// by default, assume that dimensions of MMCJ's output are known at compile time
byte resultDimsUnknown = (byte) 0;
MatrixCharacteristics[] stats = commonSetup(job, inBlockRepresentation, inputs, inputInfos, rlens, clens, brlens, bclens, instructionsInMapper, aggInstructionsInReducer, aggBinInstrction, numReducers, replication, resultDimsUnknown, output, outputinfo);
// Print the complete instruction
if (LOG.isTraceEnabled())
inst.printCompleteMRJobInstruction(stats);
// There is always a single output
if (stats[0].getRows() == -1 || stats[0].getCols() == -1) {
resultDimsUnknown = (byte) 1;
// if the dimensions are unknown, then setup done in commonSetup() must be updated
byte[] resultIndexes = new byte[] { MRInstructionParser.parseSingleInstruction(aggBinInstrction).output };
byte[] resultDimsUnknown_Array = new byte[] { resultDimsUnknown };
// set up the multiple output files, and their format information
MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown_Array, new String[] { output }, new OutputInfo[] { outputinfo }, inBlockRepresentation);
}
AggregateBinaryInstruction ins = (AggregateBinaryInstruction) MRInstructionParser.parseSingleInstruction(aggBinInstrction);
MatrixCharacteristics dim1 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input1);
MatrixCharacteristics dim2 = MRJobConfiguration.getMatrixCharactristicsForBinAgg(job, ins.input2);
if (dim1.getRowsPerBlock() > dim1.getRows())
dim1.setRowsPerBlock((int) dim1.getRows());
if (dim1.getColsPerBlock() > dim1.getCols())
dim1.setColsPerBlock((int) dim1.getCols());
if (dim2.getRowsPerBlock() > dim2.getRows())
dim2.setRowsPerBlock((int) dim2.getRows());
if (dim2.getColsPerBlock() > dim2.getCols())
dim2.setColsPerBlock((int) dim2.getCols());
long blockSize1 = 77 + 8 * dim1.getRowsPerBlock() * dim1.getColsPerBlock();
long blockSize2 = 77 + 8 * dim2.getRowsPerBlock() * dim2.getColsPerBlock();
long blockSizeResult = 77 + 8 * dim1.getRowsPerBlock() * dim2.getColsPerBlock();
long cacheSize = -1;
// cache the first result
if (dim1.getRows() < dim2.getCols()) {
long numBlocks = (long) Math.ceil((double) dim1.getRows() / (double) dim1.getRowsPerBlock());
cacheSize = numBlocks * (20 + blockSize1) + 32;
} else // cache the second result
{
long numBlocks = (long) Math.ceil((double) dim2.getCols() / (double) dim2.getColsPerBlock());
cacheSize = numBlocks * (20 + blockSize2) + 32;
}
// add known memory consumption (will be substracted from output buffer)
cacheSize += // the cached key-value pair (plus input instance)
2 * Math.max(blockSize1, blockSize2) + // the cached single result
blockSizeResult + // misc memory requirement by hadoop
MRJobConfiguration.getMiscMemRequired(job);
MRJobConfiguration.setMMCJCacheSize(job, (int) cacheSize);
// set unique working dir
MRJobConfiguration.setUniqueWorkingDir(job);
// run mmcj job
RunningJob runjob = JobClient.runJob(job);
/* Process different counters */
// NOTE: MMCJ job always has only a single output.
// Hence, no need to scan resultIndexes[] like other jobs
int outputIndex = 0;
Byte outputMatrixID = MRInstructionParser.parseSingleInstruction(aggBinInstrction).output;
Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS);
// number of non-zeros
stats[outputIndex].setNonZeros(group.getCounter(Byte.toString(outputMatrixID)));
return new JobReturn(stats[outputIndex], outputinfo, runjob.isSuccessful());
}
use of org.apache.sysml.runtime.instructions.mr.AggregateBinaryInstruction in project systemml by apache.
the class MatrixCharacteristics method computeDimension.
public static void computeDimension(HashMap<Byte, MatrixCharacteristics> dims, MRInstruction ins) {
MatrixCharacteristics dimOut = dims.get(ins.output);
if (dimOut == null) {
dimOut = new MatrixCharacteristics();
dims.put(ins.output, dimOut);
}
if (ins instanceof ReorgInstruction) {
ReorgInstruction realIns = (ReorgInstruction) ins;
reorg(dims.get(realIns.input), (ReorgOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof AppendInstruction) {
AppendInstruction realIns = (AppendInstruction) ins;
MatrixCharacteristics in_dim1 = dims.get(realIns.input1);
MatrixCharacteristics in_dim2 = dims.get(realIns.input2);
if (realIns.isCBind())
dimOut.set(in_dim1.numRows, in_dim1.numColumns + in_dim2.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
else
dimOut.set(in_dim1.numRows + in_dim2.numRows, in_dim1.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
} else if (ins instanceof CumulativeAggregateInstruction) {
AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
MatrixCharacteristics in = dims.get(realIns.input);
dimOut.set((long) Math.ceil((double) in.getRows() / in.getRowsPerBlock()), in.getCols(), in.getRowsPerBlock(), in.getColsPerBlock());
} else if (ins instanceof AggregateUnaryInstruction) {
AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
aggregateUnary(dims.get(realIns.input), (AggregateUnaryOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof AggregateBinaryInstruction) {
AggregateBinaryInstruction realIns = (AggregateBinaryInstruction) ins;
aggregateBinary(dims.get(realIns.input1), dims.get(realIns.input2), (AggregateBinaryOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof MapMultChainInstruction) {
// output size independent of chain type
MapMultChainInstruction realIns = (MapMultChainInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
dimOut.set(mc1.numColumns, mc2.numColumns, mc1.numRowsPerBlock, mc1.numColumnsPerBlock);
} else if (ins instanceof QuaternaryInstruction) {
QuaternaryInstruction realIns = (QuaternaryInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
MatrixCharacteristics mc3 = dims.get(realIns.getInput3());
realIns.computeMatrixCharacteristics(mc1, mc2, mc3, dimOut);
} else if (ins instanceof ReblockInstruction) {
ReblockInstruction realIns = (ReblockInstruction) ins;
MatrixCharacteristics in_dim = dims.get(realIns.input);
dimOut.set(in_dim.numRows, in_dim.numColumns, realIns.brlen, realIns.bclen, in_dim.nonZero);
} else if (ins instanceof MatrixReshapeMRInstruction) {
MatrixReshapeMRInstruction mrinst = (MatrixReshapeMRInstruction) ins;
MatrixCharacteristics in_dim = dims.get(mrinst.input);
dimOut.set(mrinst.getNumRows(), mrinst.getNumColunms(), in_dim.getRowsPerBlock(), in_dim.getColsPerBlock(), in_dim.getNonZeros());
} else if (ins instanceof RandInstruction || ins instanceof SeqInstruction) {
DataGenMRInstruction dataIns = (DataGenMRInstruction) ins;
dimOut.set(dims.get(dataIns.getInput()));
} else if (ins instanceof ReplicateInstruction) {
ReplicateInstruction realIns = (ReplicateInstruction) ins;
realIns.computeOutputDimension(dims.get(realIns.input), dimOut);
} else if (// before unary
ins instanceof ParameterizedBuiltinMRInstruction) {
ParameterizedBuiltinMRInstruction realIns = (ParameterizedBuiltinMRInstruction) ins;
realIns.computeOutputCharacteristics(dims.get(realIns.input), dimOut);
} else if (ins instanceof ScalarInstruction || ins instanceof AggregateInstruction || (ins instanceof UnaryInstruction && !(ins instanceof MMTSJMRInstruction)) || ins instanceof ZeroOutInstruction) {
UnaryMRInstructionBase realIns = (UnaryMRInstructionBase) ins;
dimOut.set(dims.get(realIns.input));
} else if (ins instanceof MMTSJMRInstruction) {
MMTSJMRInstruction mmtsj = (MMTSJMRInstruction) ins;
MMTSJType tstype = mmtsj.getMMTSJType();
MatrixCharacteristics mc = dims.get(mmtsj.input);
dimOut.set(tstype.isLeft() ? mc.numColumns : mc.numRows, tstype.isLeft() ? mc.numColumns : mc.numRows, mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (ins instanceof PMMJMRInstruction) {
PMMJMRInstruction pmmins = (PMMJMRInstruction) ins;
MatrixCharacteristics mc = dims.get(pmmins.input2);
dimOut.set(pmmins.getNumRows(), mc.numColumns, mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (ins instanceof RemoveEmptyMRInstruction) {
RemoveEmptyMRInstruction realIns = (RemoveEmptyMRInstruction) ins;
MatrixCharacteristics mc = dims.get(realIns.input1);
long min = realIns.isEmptyReturn() ? 1 : 0;
if (realIns.isRemoveRows())
dimOut.set(Math.max(realIns.getOutputLen(), min), mc.getCols(), mc.numRowsPerBlock, mc.numColumnsPerBlock);
else
dimOut.set(mc.getRows(), Math.max(realIns.getOutputLen(), min), mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (// needs to be checked before binary
ins instanceof UaggOuterChainInstruction) {
UaggOuterChainInstruction realIns = (UaggOuterChainInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
MatrixCharacteristics mc2 = dims.get(realIns.input2);
realIns.computeOutputCharacteristics(mc1, mc2, dimOut);
} else if (ins instanceof GroupedAggregateMInstruction) {
GroupedAggregateMInstruction realIns = (GroupedAggregateMInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
realIns.computeOutputCharacteristics(mc1, dimOut);
} else if (ins instanceof BinaryInstruction || ins instanceof BinaryMInstruction || ins instanceof CombineBinaryInstruction) {
BinaryMRInstructionBase realIns = (BinaryMRInstructionBase) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
MatrixCharacteristics mc2 = dims.get(realIns.input2);
if (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && // outer
mc2.getCols() > 1) {
dimOut.set(mc1.getRows(), mc2.getCols(), mc1.getRowsPerBlock(), mc2.getColsPerBlock());
} else {
// default case
dimOut.set(mc1);
}
} else if (ins instanceof TernaryInstruction) {
dimOut.set(dims.get(ins.getInputIndexes()[0]));
} else if (ins instanceof CombineTernaryInstruction) {
CtableInstruction realIns = (CtableInstruction) ins;
dimOut.set(dims.get(realIns.input1));
} else if (ins instanceof CombineUnaryInstruction) {
dimOut.set(dims.get(((CombineUnaryInstruction) ins).input));
} else if (ins instanceof CM_N_COVInstruction || ins instanceof GroupedAggregateInstruction) {
dimOut.set(1, 1, 1, 1);
} else if (ins instanceof RangeBasedReIndexInstruction) {
RangeBasedReIndexInstruction realIns = (RangeBasedReIndexInstruction) ins;
MatrixCharacteristics dimIn = dims.get(realIns.input);
realIns.computeOutputCharacteristics(dimIn, dimOut);
} else if (ins instanceof CtableInstruction) {
CtableInstruction realIns = (CtableInstruction) ins;
MatrixCharacteristics in_dim = dims.get(realIns.input1);
dimOut.set(realIns.getOutputDim1(), realIns.getOutputDim2(), in_dim.numRowsPerBlock, in_dim.numColumnsPerBlock);
} else {
/*
* if ins is none of the above cases then we assume that dim_out dimensions are unknown
*/
dimOut.numRows = -1;
dimOut.numColumns = -1;
dimOut.numRowsPerBlock = 1;
dimOut.numColumnsPerBlock = 1;
}
}
Aggregations