use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class MapperBase method configure.
@Override
public void configure(JobConf job) {
super.configure(job);
// since one matrix file can occur multiple times in a statement
try {
representativeMatrixes = MRJobConfiguration.getInputMatrixIndexesInMapper(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
// get input converter information
inputConverter = MRJobConfiguration.getInputConverter(job, representativeMatrixes.get(0));
DataGenMRInstruction[] allDataGenIns;
MRInstruction[] allMapperIns;
ReblockInstruction[] allReblockIns;
CSVReblockInstruction[] allCSVReblockIns;
try {
allDataGenIns = MRJobConfiguration.getDataGenInstructions(job);
// parse the instructions on the matrices that this file represent
allMapperIns = MRJobConfiguration.getInstructionsInMapper(job);
// parse the reblock instructions on the matrices that this file represent
allReblockIns = MRJobConfiguration.getReblockInstructions(job);
allCSVReblockIns = MRJobConfiguration.getCSVReblockInstructions(job);
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
// get all the output indexes
byte[] outputs = MRJobConfiguration.getOutputIndexesInMapper(job);
// get the dimension of all the representative matrices
rlens = new long[representativeMatrixes.size()];
clens = new long[representativeMatrixes.size()];
for (int i = 0; i < representativeMatrixes.size(); i++) {
rlens[i] = MRJobConfiguration.getNumRows(job, representativeMatrixes.get(i));
clens[i] = MRJobConfiguration.getNumColumns(job, representativeMatrixes.get(i));
}
// get the block sizes of the representative matrices
brlens = new int[representativeMatrixes.size()];
bclens = new int[representativeMatrixes.size()];
for (int i = 0; i < representativeMatrixes.size(); i++) {
brlens[i] = MRJobConfiguration.getNumRowsPerBlock(job, representativeMatrixes.get(i));
bclens[i] = MRJobConfiguration.getNumColumnsPerBlock(job, representativeMatrixes.get(i));
}
rbounds = new long[representativeMatrixes.size()];
cbounds = new long[representativeMatrixes.size()];
lastblockrlens = new int[representativeMatrixes.size()];
lastblockclens = new int[representativeMatrixes.size()];
// calculate upper boundaries for key value pairs
if (valueClass.equals(MatrixBlock.class)) {
for (int i = 0; i < representativeMatrixes.size(); i++) {
rbounds[i] = (long) Math.max(Math.ceil((double) rlens[i] / brlens[i]), 1);
cbounds[i] = (long) Math.max(Math.ceil((double) clens[i] / bclens[i]), 1);
lastblockrlens[i] = (int) (rlens[i] % brlens[i]);
lastblockclens[i] = (int) (clens[i] % bclens[i]);
if (lastblockrlens[i] == 0)
lastblockrlens[i] = brlens[i];
if (lastblockclens[i] == 0)
lastblockclens[i] = bclens[i];
}
} else {
for (int i = 0; i < representativeMatrixes.size(); i++) {
rbounds[i] = rlens[i];
cbounds[i] = clens[i];
lastblockrlens[i] = 1;
lastblockclens[i] = 1;
}
}
// load data from distributed cache (if required, reuse if jvm_reuse)
try {
setupDistCacheFiles(job);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
// collect unary instructions for each representative matrix
HashSet<Byte> set = new HashSet<>();
for (int i = 0; i < representativeMatrixes.size(); i++) {
set.clear();
set.add(representativeMatrixes.get(i));
// collect the relavent datagen instructions for this representative matrix
ArrayList<DataGenMRInstruction> dataGensForThisMatrix = new ArrayList<>();
if (allDataGenIns != null) {
for (DataGenMRInstruction ins : allDataGenIns) {
if (set.contains(ins.getInput())) {
dataGensForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
if (dataGensForThisMatrix.size() > 1)
throw new RuntimeException("only expects at most one rand instruction per input");
if (dataGensForThisMatrix.isEmpty())
dataGen_instructions.add(null);
else
dataGen_instructions.add(dataGensForThisMatrix.get(0));
// collect the relavent instructions for this representative matrix
ArrayList<MRInstruction> opsForThisMatrix = new ArrayList<>();
if (allMapperIns != null) {
for (MRInstruction ins : allMapperIns) {
try {
/*
boolean toAdd=true;
for(byte input: ins.getInputIndexes())
if(!set.contains(input))
{
toAdd=false;
break;
}
*/
boolean toAdd = false;
for (byte input : ins.getInputIndexes()) if (set.contains(input)) {
toAdd = true;
break;
}
if (toAdd) {
opsForThisMatrix.add(ins);
set.add(ins.output);
}
} catch (DMLRuntimeException e) {
throw new RuntimeException(e);
}
}
}
mapper_instructions.add(opsForThisMatrix);
// collect the relavent reblock instructions for this representative matrix
ArrayList<ReblockInstruction> reblocksForThisMatrix = new ArrayList<>();
if (allReblockIns != null) {
for (ReblockInstruction ins : allReblockIns) {
if (set.contains(ins.input)) {
reblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
reblock_instructions.add(reblocksForThisMatrix);
// collect the relavent reblock instructions for this representative matrix
ArrayList<CSVReblockInstruction> csvReblocksForThisMatrix = new ArrayList<>();
if (allCSVReblockIns != null) {
for (CSVReblockInstruction ins : allCSVReblockIns) {
if (set.contains(ins.input)) {
csvReblocksForThisMatrix.add(ins);
set.add(ins.output);
}
}
}
csv_reblock_instructions.add(csvReblocksForThisMatrix);
// collect the output indexes for this representative matrix
ArrayList<Byte> outsForThisMatrix = new ArrayList<>();
for (byte output : outputs) {
if (set.contains(output))
outsForThisMatrix.add(output);
}
outputIndexes.add(outsForThisMatrix);
}
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class MatrixCharacteristics method computeDimension.
public static void computeDimension(HashMap<Byte, MatrixCharacteristics> dims, MRInstruction ins) {
MatrixCharacteristics dimOut = dims.get(ins.output);
if (dimOut == null) {
dimOut = new MatrixCharacteristics();
dims.put(ins.output, dimOut);
}
if (ins instanceof ReorgInstruction) {
ReorgInstruction realIns = (ReorgInstruction) ins;
reorg(dims.get(realIns.input), (ReorgOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof AppendInstruction) {
AppendInstruction realIns = (AppendInstruction) ins;
MatrixCharacteristics in_dim1 = dims.get(realIns.input1);
MatrixCharacteristics in_dim2 = dims.get(realIns.input2);
if (realIns.isCBind())
dimOut.set(in_dim1.numRows, in_dim1.numColumns + in_dim2.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
else
dimOut.set(in_dim1.numRows + in_dim2.numRows, in_dim1.numColumns, in_dim1.numRowsPerBlock, in_dim2.numColumnsPerBlock);
} else if (ins instanceof CumulativeAggregateInstruction) {
AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
MatrixCharacteristics in = dims.get(realIns.input);
dimOut.set((long) Math.ceil((double) in.getRows() / in.getRowsPerBlock()), in.getCols(), in.getRowsPerBlock(), in.getColsPerBlock());
} else if (ins instanceof AggregateUnaryInstruction) {
AggregateUnaryInstruction realIns = (AggregateUnaryInstruction) ins;
aggregateUnary(dims.get(realIns.input), (AggregateUnaryOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof AggregateBinaryInstruction) {
AggregateBinaryInstruction realIns = (AggregateBinaryInstruction) ins;
aggregateBinary(dims.get(realIns.input1), dims.get(realIns.input2), (AggregateBinaryOperator) realIns.getOperator(), dimOut);
} else if (ins instanceof MapMultChainInstruction) {
// output size independent of chain type
MapMultChainInstruction realIns = (MapMultChainInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
dimOut.set(mc1.numColumns, mc2.numColumns, mc1.numRowsPerBlock, mc1.numColumnsPerBlock);
} else if (ins instanceof QuaternaryInstruction) {
QuaternaryInstruction realIns = (QuaternaryInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.getInput1());
MatrixCharacteristics mc2 = dims.get(realIns.getInput2());
MatrixCharacteristics mc3 = dims.get(realIns.getInput3());
realIns.computeMatrixCharacteristics(mc1, mc2, mc3, dimOut);
} else if (ins instanceof ReblockInstruction) {
ReblockInstruction realIns = (ReblockInstruction) ins;
MatrixCharacteristics in_dim = dims.get(realIns.input);
dimOut.set(in_dim.numRows, in_dim.numColumns, realIns.brlen, realIns.bclen, in_dim.nonZero);
} else if (ins instanceof MatrixReshapeMRInstruction) {
MatrixReshapeMRInstruction mrinst = (MatrixReshapeMRInstruction) ins;
MatrixCharacteristics in_dim = dims.get(mrinst.input);
dimOut.set(mrinst.getNumRows(), mrinst.getNumColunms(), in_dim.getRowsPerBlock(), in_dim.getColsPerBlock(), in_dim.getNonZeros());
} else if (ins instanceof RandInstruction || ins instanceof SeqInstruction) {
DataGenMRInstruction dataIns = (DataGenMRInstruction) ins;
dimOut.set(dims.get(dataIns.getInput()));
} else if (ins instanceof ReplicateInstruction) {
ReplicateInstruction realIns = (ReplicateInstruction) ins;
realIns.computeOutputDimension(dims.get(realIns.input), dimOut);
} else if (// before unary
ins instanceof ParameterizedBuiltinMRInstruction) {
ParameterizedBuiltinMRInstruction realIns = (ParameterizedBuiltinMRInstruction) ins;
realIns.computeOutputCharacteristics(dims.get(realIns.input), dimOut);
} else if (ins instanceof ScalarInstruction || ins instanceof AggregateInstruction || (ins instanceof UnaryInstruction && !(ins instanceof MMTSJMRInstruction)) || ins instanceof ZeroOutInstruction) {
UnaryMRInstructionBase realIns = (UnaryMRInstructionBase) ins;
dimOut.set(dims.get(realIns.input));
} else if (ins instanceof MMTSJMRInstruction) {
MMTSJMRInstruction mmtsj = (MMTSJMRInstruction) ins;
MMTSJType tstype = mmtsj.getMMTSJType();
MatrixCharacteristics mc = dims.get(mmtsj.input);
dimOut.set(tstype.isLeft() ? mc.numColumns : mc.numRows, tstype.isLeft() ? mc.numColumns : mc.numRows, mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (ins instanceof PMMJMRInstruction) {
PMMJMRInstruction pmmins = (PMMJMRInstruction) ins;
MatrixCharacteristics mc = dims.get(pmmins.input2);
dimOut.set(pmmins.getNumRows(), mc.numColumns, mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (ins instanceof RemoveEmptyMRInstruction) {
RemoveEmptyMRInstruction realIns = (RemoveEmptyMRInstruction) ins;
MatrixCharacteristics mc = dims.get(realIns.input1);
long min = realIns.isEmptyReturn() ? 1 : 0;
if (realIns.isRemoveRows())
dimOut.set(Math.max(realIns.getOutputLen(), min), mc.getCols(), mc.numRowsPerBlock, mc.numColumnsPerBlock);
else
dimOut.set(mc.getRows(), Math.max(realIns.getOutputLen(), min), mc.numRowsPerBlock, mc.numColumnsPerBlock);
} else if (// needs to be checked before binary
ins instanceof UaggOuterChainInstruction) {
UaggOuterChainInstruction realIns = (UaggOuterChainInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
MatrixCharacteristics mc2 = dims.get(realIns.input2);
realIns.computeOutputCharacteristics(mc1, mc2, dimOut);
} else if (ins instanceof GroupedAggregateMInstruction) {
GroupedAggregateMInstruction realIns = (GroupedAggregateMInstruction) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
realIns.computeOutputCharacteristics(mc1, dimOut);
} else if (ins instanceof BinaryInstruction || ins instanceof BinaryMInstruction || ins instanceof CombineBinaryInstruction) {
BinaryMRInstructionBase realIns = (BinaryMRInstructionBase) ins;
MatrixCharacteristics mc1 = dims.get(realIns.input1);
MatrixCharacteristics mc2 = dims.get(realIns.input2);
if (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && // outer
mc2.getCols() > 1) {
dimOut.set(mc1.getRows(), mc2.getCols(), mc1.getRowsPerBlock(), mc2.getColsPerBlock());
} else {
// default case
dimOut.set(mc1);
}
} else if (ins instanceof TernaryInstruction) {
dimOut.set(dims.get(ins.getInputIndexes()[0]));
} else if (ins instanceof CombineTernaryInstruction) {
CtableInstruction realIns = (CtableInstruction) ins;
dimOut.set(dims.get(realIns.input1));
} else if (ins instanceof CombineUnaryInstruction) {
dimOut.set(dims.get(((CombineUnaryInstruction) ins).input));
} else if (ins instanceof CM_N_COVInstruction || ins instanceof GroupedAggregateInstruction) {
dimOut.set(1, 1, 1, 1);
} else if (ins instanceof RangeBasedReIndexInstruction) {
RangeBasedReIndexInstruction realIns = (RangeBasedReIndexInstruction) ins;
MatrixCharacteristics dimIn = dims.get(realIns.input);
realIns.computeOutputCharacteristics(dimIn, dimOut);
} else if (ins instanceof CtableInstruction) {
CtableInstruction realIns = (CtableInstruction) ins;
MatrixCharacteristics in_dim = dims.get(realIns.input1);
dimOut.set(realIns.getOutputDim1(), realIns.getOutputDim2(), in_dim.numRowsPerBlock, in_dim.numColumnsPerBlock);
} else {
/*
* if ins is none of the above cases then we assume that dim_out dimensions are unknown
*/
dimOut.numRows = -1;
dimOut.numColumns = -1;
dimOut.numRowsPerBlock = 1;
dimOut.numColumnsPerBlock = 1;
}
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.
the class CostEstimatorStaticRuntime method getMapOutputIndexes.
private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) {
// note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
// map indices
HashSet<Byte> ixMap = new HashSet<>();
for (byte ix : inIx) ixMap.add(ix);
if (rdInst != null && rdInst.length() > 0) {
rdInst = replaceInstructionPatch(rdInst);
DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
if (mapInst != null && mapInst.length() > 0) {
mapInst = replaceInstructionPatch(mapInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
// reduce indices
HashSet<Byte> ixRed = new HashSet<>();
for (byte ix : retIx) ixRed.add(ix);
if (shfInst != null && shfInst.length() > 0) {
shfInst = replaceInstructionPatch(shfInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (aggInst != null && aggInst.length() > 0) {
aggInst = replaceInstructionPatch(aggInst);
MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (otherInst != null && otherInst.length() > 0) {
otherInst = replaceInstructionPatch(otherInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
// difference
ixMap.retainAll(ixRed);
// copy result
byte[] ret = new byte[ixMap.size()];
int i = 0;
for (byte ix : ixMap) ret[i++] = ix;
return ret;
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project systemml by apache.
the class MRJobConfiguration method computeMatrixCharacteristics.
/**
* NOTE: this method needs to be in-sync with MRBaseForCommonInstructions.processOneInstruction,
* otherwise, the latter will potentially fail with missing dimension information.
*
* @param job job configuration
* @param inputIndexes array of byte indexes
* @param dataGenInstructions data gen instructions as a string
* @param instructionsInMapper instruction in mapper as a string
* @param reblockInstructions reblock instructions as a string
* @param aggInstructionsInReducer aggregate instructions in reducer as a string
* @param aggBinInstructions binary aggregate instructions as a string
* @param otherInstructionsInReducer other instructions in reducer as a string
* @param resultIndexes array of byte result indexes
* @param mapOutputIndexes set of map output indexes
* @param forMMCJ ?
* @return reducer groups
*/
public static MatrixChar_N_ReducerGroups computeMatrixCharacteristics(JobConf job, byte[] inputIndexes, String dataGenInstructions, String instructionsInMapper, String reblockInstructions, String aggInstructionsInReducer, String aggBinInstructions, String otherInstructionsInReducer, byte[] resultIndexes, HashSet<Byte> mapOutputIndexes, boolean forMMCJ) {
HashSet<Byte> intermediateMatrixIndexes = new HashSet<>();
HashMap<Byte, MatrixCharacteristics> dims = new HashMap<>();
for (byte i : inputIndexes) {
MatrixCharacteristics dim = new MatrixCharacteristics(getNumRows(job, i), getNumColumns(job, i), getNumRowsPerBlock(job, i), getNumColumnsPerBlock(job, i), getNumNonZero(job, i));
dims.put(i, dim);
}
DataGenMRInstruction[] dataGenIns = null;
dataGenIns = MRInstructionParser.parseDataGenInstructions(dataGenInstructions);
if (dataGenIns != null) {
for (DataGenMRInstruction ins : dataGenIns) {
MatrixCharacteristics.computeDimension(dims, ins);
}
}
MRInstruction[] insMapper = MRInstructionParser.parseMixedInstructions(instructionsInMapper);
if (insMapper != null) {
for (MRInstruction ins : insMapper) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof AppendMInstruction) {
AppendMInstruction tempIns = (AppendMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AppendGInstruction) {
AppendGInstruction tempIns = (AppendGInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof BinaryMInstruction) {
BinaryMInstruction tempIns = (BinaryMInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof AggregateBinaryInstruction) {
AggregateBinaryInstruction tempIns = (AggregateBinaryInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
// TODO
intermediateMatrixIndexes.add(tempIns.input1);
} else if (ins instanceof MapMultChainInstruction) {
MapMultChainInstruction tempIns = (MapMultChainInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.getInput1(), dims.get(tempIns.getInput2()));
intermediateMatrixIndexes.add(tempIns.getInput1());
} else if (ins instanceof PMMJMRInstruction) {
PMMJMRInstruction tempIns = (PMMJMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input2, dims.get(tempIns.input2));
intermediateMatrixIndexes.add(tempIns.input2);
}
}
}
ReblockInstruction[] reblockIns = MRInstructionParser.parseReblockInstructions(reblockInstructions);
if (reblockIns != null) {
for (ReblockInstruction ins : reblockIns) {
MatrixCharacteristics.computeDimension(dims, ins);
setMatrixCharactristicsForReblock(job, ins.output, dims.get(ins.output));
}
}
Instruction[] aggIns = MRInstructionParser.parseAggregateInstructions(aggInstructionsInReducer);
if (aggIns != null) {
for (Instruction ins : aggIns) {
MatrixCharacteristics.computeDimension(dims, (MRInstruction) ins);
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
MRInstruction mrins = (MRInstruction) ins;
boolean found = false;
for (byte b : resultIndexes) {
if (b == mrins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, mrins.output, dims.get(mrins.output));
intermediateMatrixIndexes.add(mrins.output);
}
}
}
long numReduceGroups = 0;
AggregateBinaryInstruction[] aggBinIns = getAggregateBinaryInstructions(job);
if (aggBinIns != null) {
for (AggregateBinaryInstruction ins : aggBinIns) {
MatrixCharacteristics dim1 = dims.get(ins.input1);
MatrixCharacteristics dim2 = dims.get(ins.input2);
setMatrixCharactristicsForBinAgg(job, ins.input1, dim1);
setMatrixCharactristicsForBinAgg(job, ins.input2, dim2);
MatrixCharacteristics.computeDimension(dims, ins);
if (// there will be only one aggbin operation for MMCJ
forMMCJ)
numReduceGroups = (long) Math.ceil((double) dim1.getCols() / (double) dim1.getColsPerBlock());
}
}
if (!forMMCJ) {
// store the skylines
ArrayList<Long> xs = new ArrayList<>(mapOutputIndexes.size());
ArrayList<Long> ys = new ArrayList<>(mapOutputIndexes.size());
for (byte idx : mapOutputIndexes) {
MatrixCharacteristics dim = dims.get(idx);
long x = (long) Math.ceil((double) dim.getRows() / (double) dim.getRowsPerBlock());
long y = (long) Math.ceil((double) dim.getCols() / (double) dim.getColsPerBlock());
int i = 0;
boolean toadd = true;
while (i < xs.size()) {
if ((x >= xs.get(i) && y > ys.get(i)) || (x > xs.get(i) && y >= ys.get(i))) {
// remove any included x's and y's
xs.remove(i);
ys.remove(i);
} else if (// if included in others, stop
x <= xs.get(i) && y <= ys.get(i)) {
toadd = false;
break;
} else
i++;
}
if (toadd) {
xs.add(x);
ys.add(y);
}
}
// sort by x
TreeMap<Long, Long> map = new TreeMap<>();
for (int i = 0; i < xs.size(); i++) map.put(xs.get(i), ys.get(i));
numReduceGroups = 0;
// compute area
long prev = 0;
for (Entry<Long, Long> e : map.entrySet()) {
numReduceGroups += (e.getKey() - prev) * e.getValue();
prev = e.getKey();
}
}
MRInstruction[] insReducer = MRInstructionParser.parseMixedInstructions(otherInstructionsInReducer);
if (insReducer != null) {
for (MRInstruction ins : insReducer) {
MatrixCharacteristics.computeDimension(dims, ins);
if (ins instanceof UnaryMRInstructionBase) {
UnaryMRInstructionBase tempIns = (UnaryMRInstructionBase) ins;
setIntermediateMatrixCharactristics(job, tempIns.input, dims.get(tempIns.input));
intermediateMatrixIndexes.add(tempIns.input);
} else if (ins instanceof RemoveEmptyMRInstruction) {
RemoveEmptyMRInstruction tempIns = (RemoveEmptyMRInstruction) ins;
setIntermediateMatrixCharactristics(job, tempIns.input1, dims.get(tempIns.input1));
intermediateMatrixIndexes.add(tempIns.input1);
}
// if instruction's output is not in resultIndexes, then add its dimensions to jobconf
boolean found = false;
for (byte b : resultIndexes) {
if (b == ins.output) {
found = true;
break;
}
}
if (!found) {
setIntermediateMatrixCharactristics(job, ins.output, dims.get(ins.output));
intermediateMatrixIndexes.add(ins.output);
}
}
}
setIntermediateMatrixIndexes(job, intermediateMatrixIndexes);
for (byte tag : mapOutputIndexes) setMatrixCharactristicsForMapperOutput(job, tag, dims.get(tag));
MatrixCharacteristics[] stats = new MatrixCharacteristics[resultIndexes.length];
MatrixCharacteristics resultDims;
for (int i = 0; i < resultIndexes.length; i++) {
resultDims = dims.get(resultIndexes[i]);
stats[i] = resultDims;
setMatrixCharactristicsForOutput(job, resultIndexes[i], stats[i]);
}
return new MatrixChar_N_ReducerGroups(stats, numReduceGroups);
}
use of org.apache.sysml.runtime.instructions.mr.DataGenMRInstruction in project incubator-systemml by apache.
the class CostEstimatorStaticRuntime method getMapOutputIndexes.
private byte[] getMapOutputIndexes(byte[] inIx, byte[] retIx, String rdInst, String mapInst, String shfInst, String aggInst, String otherInst) {
// note: this is a simplified version of MRJobConfiguration.setUpOutputIndexesForMapper
// map indices
HashSet<Byte> ixMap = new HashSet<>();
for (byte ix : inIx) ixMap.add(ix);
if (rdInst != null && rdInst.length() > 0) {
rdInst = replaceInstructionPatch(rdInst);
DataGenMRInstruction[] ins = MRInstructionParser.parseDataGenInstructions(rdInst);
for (DataGenMRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
if (mapInst != null && mapInst.length() > 0) {
mapInst = replaceInstructionPatch(mapInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(mapInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixMap.add(ix);
}
// reduce indices
HashSet<Byte> ixRed = new HashSet<>();
for (byte ix : retIx) ixRed.add(ix);
if (shfInst != null && shfInst.length() > 0) {
shfInst = replaceInstructionPatch(shfInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(shfInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (aggInst != null && aggInst.length() > 0) {
aggInst = replaceInstructionPatch(aggInst);
MRInstruction[] ins = MRInstructionParser.parseAggregateInstructions(aggInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
if (otherInst != null && otherInst.length() > 0) {
otherInst = replaceInstructionPatch(otherInst);
MRInstruction[] ins = MRInstructionParser.parseMixedInstructions(otherInst);
for (MRInstruction inst : ins) for (byte ix : inst.getAllIndexes()) ixRed.add(ix);
}
// difference
ixMap.retainAll(ixRed);
// copy result
byte[] ret = new byte[ixMap.size()];
int i = 0;
for (byte ix : ixMap) ret[i++] = ix;
return ret;
}
Aggregations