use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.
the class MRJobConfiguration method computeMatrixCharacteristics.
/**
* NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
* otherwise, the latter will potentially fail with missing dimension information.
*
* @param job job configuration
* @param inputIndexes array of byte indexes
* @param dataGenInstructions data gen instructions as a string
* @param instructionsInMapper instruction in mapper as a string
* @param reblockInstructions reblock instructions as a string
* @param aggInstructionsInReducer aggregate instructions in reducer as a string
* @param aggBinInstructions binary aggregate instructions as a string
* @param otherInstructionsInReducer other instructions in reducer as a string
* @param resultIndexes array of byte result indexes
* @param mapOutputIndexes set of map output indexes
* @param forMMCJ ?
* @return reducer groups
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) throws DMLRuntimeException {
HashSet<Byte> intermediateMatrixIndexes = new HashSet<Byte>();
HashMap<Byte, MatrixCharacteristics> dims = new HashMap<Byte, MatrixCharacteristics>();
for (byte i : inputIndexes) {
MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
dims.put(i, dim);
}
DataGenMRInstruction[] dataGenIns = null;
dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
if (dataGenIns != null) {
for (DataGenMRInstruction ins : dataGenIns) {
MatrixCharacteristics.computeDimension(dims, ins);
}
}
MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
if (insMapper != null) {
for (MRInstruction ins : insMapper) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof AppendMInstruction) {
AppendMInstruction tempIns = (AppendMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AppendGInstruction) {
AppendGInstruction tempIns = (AppendGInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof BinaryMInstruction) {
BinaryMInstruction tempIns = (BinaryMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AggregateBinaryInstruction) {
AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
//TODO
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof MapMultChainInstruction) {
MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
intermediateMatrixIndexes.add(tempIns.getInput1());
} else if (ins instanceof PMMJMRInstruction) {
PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
intermediateMatrixIndexes.add(tempIns.input2);
}
}
}
ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
if (reblockIns != null) {
for (ReblockInstruction ins : reblockIns) {
MatrixCharacteristics.computeDimension(dims, ins);
setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
}
}
Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
if (aggIns != null) {
for (Instruction ins : aggIns) {
MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
MRInstruction mrins = (MRInstruction) ins;
boolean found = false;
for (byte b : resultIndexes) {
if (b == mrins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
intermediateMatrixIndexes.add(mrins.output);
}
}
}
long numReduceGroups = 0;
AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
if (aggBinIns != null) {
for (AggregateBinaryInstruction ins : aggBinIns) {
MatrixCharacteristics dim1 = dims.get(ins.input1);
MatrixCharacteristics dim2 = dims.get(ins.input2);
setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
MatrixCharacteristics.computeDimension(dims, ins);
if (//there will be only one aggbin operation for MMCJ
forMMCJ)
numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
}
}
if (!forMMCJ) {
//store the skylines
ArrayList<Long> xs = new ArrayList<Long>(mapOutputIndexes.size());
ArrayList<Long> ys = new ArrayList<Long>(mapOutputIndexes.size());
for (byte idx : mapOutputIndexes) {
MatrixCharacteristics dim = dims.get(idx);
long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
int i = 0;
boolean toadd = true;
while (i < xs.size()) {
if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
//remove any included x's and y's
xs.remove(i);
ys.remove(i);
} else if (//if included in others, stop
x <= xs.get(i) && y <= ys.get(i)) {
toadd = false;
break;
} else
i++;
}
if (toadd) {
xs.add(x);
ys.add(y);
}
}
//sort by x
TreeMap<Long, Long> map = new TreeMap<Long, Long>();
for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
numReduceGroups = 0;
//compute area
long prev = 0;
for (Entry<Long, Long> e : map.entrySet()) {
numReduceGroups += (e.getKey() - prev) * e.getValue();
prev = e.getKey();
}
}
MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
if (insReducer != null) {
for (MRInstruction ins : insReducer) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof RemoveEmptyMRInstruction) {
RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
}
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
boolean found = false;
for (byte b : resultIndexes) {
if (b == ins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
intermediateMatrixIndexes.add(ins.output);
}
}
}
setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
MatrixCharacteristics resultDims;
for (int i = 0; i < resultIndexes.length; i++) {
resultDims = dims.get(resultIndexes[i]);
stats[i] = resultDims;
setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
}
return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.
the class ReduceBase method configure.
public void configure(JobConf job) {
super.configure(job);
reducerID = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix");
//get the indexes of the final output matrices
resultIndexes = MRJobConfiguration.getResultIndexes(job);
resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job);
//initialize SystemML Counters (defined in MRJobConfiguration)
resultsNonZeros = new long[resultIndexes.length];
resultsMaxRowDims = new long[resultIndexes.length];
resultsMaxColDims = new long[resultIndexes.length];
collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job);
//parse aggregate operations
AggregateInstruction[] agg_insts = null;
try {
agg_insts = MRJobConfiguration.getAggregateInstructions(job);
//parse unary and binary operations
MRInstruction[] tmp = MRJobConfiguration.getInstructionsInReducer(job);
if (tmp != null) {
mixed_instructions = new ArrayList<MRInstruction>();
Collections.addAll(mixed_instructions, tmp);
}
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
//load data from distributed cache (if required, reuse if jvm_reuse)
try {
setupDistCacheFiles(job);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
//reorganize the aggregate instructions, so that they are all associatied with each input
if (agg_insts != null) {
for (AggregateInstruction ins : agg_insts) {
//associate instruction to its input
ArrayList<AggregateInstruction> vec = agg_instructions.get(ins.input);
if (vec == null) {
vec = new ArrayList<AggregateInstruction>();
agg_instructions.put(ins.input, vec);
}
vec.add(ins);
if (ins.input == ins.output)
continue;
//need to add new aggregate instructions so that partial aggregation can be applied
//this is important for combiner in the reducer side
AggregateInstruction partialIns = new AggregateInstruction(ins.getOperator(), ins.output, ins.output, ins.toString());
vec = agg_instructions.get(partialIns.input);
if (vec == null) {
vec = new ArrayList<AggregateInstruction>();
agg_instructions.put(partialIns.input, vec);
}
vec.add(partialIns);
}
}
}
use of org.apache.sysml.runtime.instructions.mr.MRInstruction in project incubator-systemml by apache.
the class CostEstimatorStaticRuntime method getMapOutputIndexes.
private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) throws DMLRuntimeException {
//note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
//map indices
HashSet<Byte> ixMap = new HashSet<Byte>();
for (byte ix : inIx) ixMap.add(ix);
if (rdInst != null && rdInst.length() > 0) {
rdInst = replaceInstructionPatch(rdInst);
DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
if (mapInst != null && mapInst.length() > 0) {
mapInst = replaceInstructionPatch(mapInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
//reduce indices
HashSet<Byte> ixRed = new HashSet<Byte>();
for (byte ix : retIx) ixRed.add(ix);
if (shfInst != null && shfInst.length() > 0) {
shfInst = replaceInstructionPatch(shfInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (aggInst != null && aggInst.length() > 0) {
aggInst = replaceInstructionPatch(aggInst);
MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (otherInst != null && otherInst.length() > 0) {
otherInst = replaceInstructionPatch(otherInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
//difference
ixMap.retainAll(ixRed);
//copy result
byte[] ret = new byte[ixMap.size()];
int i = 0;
for (byte ix : ixMap) ret[i++] = ix;
return ret;
}
Aggregations