use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method generateMapReduceInstructions.
/**
* Method to generate MapReduce job instructions from a given set of nodes.
*
* @param execNodes list of exec nodes
* @param inst list of instructions
* @param writeinst list of write instructions
* @param deleteinst list of delete instructions
* @param rmvarinst list of rmvar instructions
* @param jt job type
*/
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
ArrayList<Byte> resultIndices = new ArrayList<>();
ArrayList<String> inputs = new ArrayList<>();
ArrayList<String> outputs = new ArrayList<>();
ArrayList<InputInfo> inputInfos = new ArrayList<>();
ArrayList<OutputInfo> outputInfos = new ArrayList<>();
ArrayList<Long> numRows = new ArrayList<>();
ArrayList<Long> numCols = new ArrayList<>();
ArrayList<Long> numRowsPerBlock = new ArrayList<>();
ArrayList<Long> numColsPerBlock = new ArrayList<>();
ArrayList<String> mapperInstructions = new ArrayList<>();
ArrayList<String> randInstructions = new ArrayList<>();
ArrayList<String> recordReaderInstructions = new ArrayList<>();
int numReducers = 0;
int replication = 1;
ArrayList<String> inputLabels = new ArrayList<>();
ArrayList<String> outputLabels = new ArrayList<>();
ArrayList<Instruction> renameInstructions = new ArrayList<>();
ArrayList<Instruction> variableInstructions = new ArrayList<>();
ArrayList<Instruction> postInstructions = new ArrayList<>();
ArrayList<Integer> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new ArrayList<>();
}
ArrayList<Lop> inputLops = new ArrayList<>();
boolean cellModeOverride = false;
/* Find the nodes that produce an output */
ArrayList<Lop> rootNodes = new ArrayList<>();
getOutputNodes(execNodes, rootNodes, jt);
if (LOG.isTraceEnabled())
LOG.trace("# of root nodes = " + rootNodes.size());
/* Remove transient writes that are simple copy of transient reads */
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
ArrayList<Lop> markedNodes = new ArrayList<>();
// only keep data nodes that are results of some computation.
for (Lop rnode : rootNodes) {
if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
// no computation, just a copy
if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
markedNodes.add(rnode);
}
}
}
// delete marked nodes
rootNodes.removeAll(markedNodes);
markedNodes.clear();
if (rootNodes.isEmpty())
return;
}
// structure that maps node to their indices that will be used in the instructions
HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
for (Lop rnode : rootNodes) {
getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
// In case of RAND job, instructions are defined in the input file
if (jt == JobType.DATAGEN)
randInstructions = inputs;
int[] start_index = new int[1];
start_index[0] = inputs.size();
// currently, recordreader instructions are allowed only in GMR jobs
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
for (Lop rnode : rootNodes) {
getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (recordReaderInstructions.size() > 1)
throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
}
}
//
if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
cellModeOverride = true;
}
if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
cellModeOverride = true;
for (int i = 0; i < rootNodes.size(); i++) {
getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Input strings: " + inputs.toString());
if (jt == JobType.DATAGEN)
LOG.trace(" Rand instructions: " + getCSVString(randInstructions));
if (jt == JobType.GMR)
LOG.trace(" RecordReader instructions: " + getCSVString(recordReaderInstructions));
LOG.trace(" Mapper instructions: " + getCSVString(mapperInstructions));
}
/* Get Shuffle and Reducer Instructions */
ArrayList<String> shuffleInstructions = new ArrayList<>();
ArrayList<String> aggInstructionsReducer = new ArrayList<>();
ArrayList<String> otherInstructionsReducer = new ArrayList<>();
for (Lop rn : rootNodes) {
int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (resultIndex == -1)
throw new LopsException("Unexpected error in piggybacking!");
if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
// Both rn (a transient write) and its input are root nodes.
// Instead of creating two copies of the data, simply generate a cpvar instruction
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
writeinst.addAll(out.getLastInstructions());
} else {
resultIndices.add(Byte.valueOf((byte) resultIndex));
// setup output filenames and outputInfos and generate related instructions
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
outputLabels.add(out.getVarName());
outputs.add(out.getFileName());
outputInfos.add(out.getOutInfo());
if (LOG.isTraceEnabled()) {
LOG.trace(" Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
}
renameInstructions.addAll(out.getLastInstructions());
variableInstructions.addAll(out.getPreInstructions());
postInstructions.addAll(out.getPostInstructions());
}
}
/* Determine if the output dimensions are known */
byte[] resultIndicesByte = new byte[resultIndices.size()];
for (int i = 0; i < resultIndicesByte.length; i++) {
resultIndicesByte[i] = resultIndices.get(i).byteValue();
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Shuffle Instructions: " + getCSVString(shuffleInstructions));
LOG.trace(" Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
LOG.trace(" Other instructions =" + getCSVString(otherInstructionsReducer));
LOG.trace(" Output strings: " + outputs.toString());
LOG.trace(" ResultIndices = " + resultIndices.toString());
}
/* Prepare the MapReduce job instruction */
MRJobInstruction mr = new MRJobInstruction(jt);
// check if this is a map-only job. If not, set the number of reducers
if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
numReducers = total_reducers;
// set inputs, outputs, and other other properties for the job
mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
mr.setOutputs(resultIndicesByte);
mr.setDimsUnknownFilePrefix(getFilePath());
mr.setNumberOfReducers(numReducers);
mr.setReplication(replication);
// set instructions for recordReader and mapper
mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
mr.setMapperInstructions(getCSVString(mapperInstructions));
// compute and set mapper memory requirements (for consistency of runtime piggybacking)
if (jt == JobType.GMR) {
double mem = 0;
for (Lop n : execNodes) mem += computeFootprintInMapper(n);
mr.setMemoryRequirements(mem);
}
if (jt == JobType.DATAGEN)
mr.setRandInstructions(getCSVString(randInstructions));
// set shuffle instructions
mr.setShuffleInstructions(getCSVString(shuffleInstructions));
// set reducer instruction
mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
if (DMLScript.ENABLE_DEBUG_MODE) {
// set line number information for each MR instruction
mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
}
/* Add the prepared instructions to output set */
inst.addAll(variableInstructions);
inst.add(mr);
inst.addAll(postInstructions);
deleteinst.addAll(renameInstructions);
for (Lop l : inputLops) {
if (DMLScript.ENABLE_DEBUG_MODE) {
processConsumers(l, rmvarinst, deleteinst, l);
} else {
processConsumers(l, rmvarinst, deleteinst, null);
}
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method getMapperInstructions.
/**
* Method to get mapper instructions for a MR job.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param inputStrings list of input strings
* @param instructionsInMapper list of instructions in mapper
* @param nodeIndexMapping ?
* @param start_index starting index
* @param inputLabels input labels
* @param MRJoblineNumbers MR job line numbers
* @return -1 if problem
*/
private int getMapperInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> instructionsInMapper, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
// if input source, return index
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
// not input source and not in exec nodes, then return.
if (!execNodes.contains(node))
return -1;
ArrayList<Integer> inputIndices = new ArrayList<>();
int max_input_index = -1;
// get mapper instructions
for (Lop childNode : node.getInputs()) {
int ret_val = getMapperInstructions(childNode, execNodes, inputStrings, instructionsInMapper, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
if (ret_val > max_input_index) {
max_input_index = ret_val;
}
}
// to mapper instructions.
if ((node.getExecLocation() == ExecLocation.Map || node.getExecLocation() == ExecLocation.MapOrReduce) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasChildNode(node, execNodes, ExecLocation.Reduce)) {
int output_index = max_input_index;
// cannot reuse index if this is true
// need to add better indexing schemes
output_index = start_index[0];
start_index[0]++;
nodeIndexMapping.put(node, output_index);
if (node instanceof Unary && node.getInputs().size() > 1) {
// Following code must be executed only for those Unary
// operators that have more than one input
// It should not be executed for "true" unary operators like
// cos(A).
int index = 0;
for (int i1 = 0; i1 < node.getInputs().size(); i1++) {
if (node.getInputs().get(i1).getDataType() == DataType.SCALAR) {
index = i1;
break;
}
}
// if data lop not a literal -- add label
if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
// if not data lop, then this is an intermediate variable.
if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
}
// get mapper instruction.
if (node.getInputs().size() == 1)
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), output_index));
else if (node.getInputs().size() == 2) {
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
} else if (node.getInputs().size() == 3)
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
else if (node.getInputs().size() == 4) {
// Example: Reshape
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
} else if (node.getInputs().size() == 5) {
// Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u]
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), output_index));
} else if (node.getInputs().size() == 7) {
// Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u] = B
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), inputIndices.get(5), inputIndices.get(6), output_index));
} else
throw new LopsException("Node with " + node.getInputs().size() + " inputs is not supported in dag.java.");
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
return output_index;
}
return -1;
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class BinaryOp method constructLopsIQM.
private void constructLopsIQM(ExecType et) {
if (et == ExecType.MR) {
CombineBinary combine = CombineBinary.constructCombineLop(OperationTypes.PreSort, (Lop) getInput().get(0).constructLops(), (Lop) getInput().get(1).constructLops(), DataType.MATRIX, getValueType());
combine.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// Sort dimensions are same as the first input
sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
setLineNumbers(lit);
PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
setLineNumbers(pick);
PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
setLineNumbers(pagg);
// Set the dimensions of PartialAggregate LOP based on the
// direction in which aggregation is performed
pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
setOutputDimensions(group1);
setLineNumbers(group1);
Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(agg1);
agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), DataType.SCALAR, getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(iqm);
setLops(iqm);
} else {
SortKeys sort = SortKeys.constructSortByValueLop(getInput().get(0).constructLops(), getInput().get(1).constructLops(), SortKeys.OperationTypes.WithWeights, getInput().get(0).getDataType(), getInput().get(0).getValueType(), et);
sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
setOutputDimensions(pick);
setLineNumbers(pick);
setLops(pick);
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class UnaryOp method constructLopsIQM.
private Lop constructLopsIQM() {
ExecType et = optFindExecType();
Hop input = getInput().get(0);
if (et == ExecType.MR) {
CombineUnary combine = CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType());
combine.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// Sort dimensions are same as the first input
sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
lit.setAllPositions(this.getFilename(), this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
setLineNumbers(pick);
PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
setLineNumbers(pagg);
// Set the dimensions of PartialAggregate LOP based on the
// direction in which aggregation is performed
pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
group1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group1);
Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(iqm);
return iqm;
} else {
SortKeys sort = SortKeys.constructSortByValueLop(input.constructLops(), SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, et);
sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
pick.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(pick);
return pick;
}
}
Aggregations