use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class UnaryOp method constructLopsIQM.
private Lop constructLopsIQM() throws HopsException, LopsException {
ExecType et = optFindExecType();
Hop input = getInput().get(0);
if (et == ExecType.MR) {
CombineUnary combine = CombineUnary.constructCombineLop(input.constructLops(), DataType.MATRIX, getValueType());
combine.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// Sort dimensions are same as the first input
sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
lit.setAllPositions(this.getBeginLine(), this.getBeginColumn(), this.getEndLine(), this.getEndColumn());
PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
setLineNumbers(pick);
PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
setLineNumbers(pagg);
// Set the dimensions of PartialAggregate LOP based on the
// direction in which aggregation is performed
pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
group1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group1);
Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(iqm);
return iqm;
} else {
SortKeys sort = SortKeys.constructSortByValueLop(input.constructLops(), SortKeys.OperationTypes.WithoutWeights, DataType.MATRIX, ValueType.DOUBLE, et);
sort.getOutputParameters().setDimensions(input.getDim1(), input.getDim2(), input.getRowsInBlock(), input.getColsInBlock(), input.getNnz());
PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
pick.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(pick);
return pick;
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class BinaryOp method constructLopsIQM.
private void constructLopsIQM(ExecType et) throws HopsException, LopsException {
if (et == ExecType.MR) {
CombineBinary combine = CombineBinary.constructCombineLop(OperationTypes.PreSort, (Lop) getInput().get(0).constructLops(), (Lop) getInput().get(1).constructLops(), DataType.MATRIX, getValueType());
combine.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
SortKeys sort = SortKeys.constructSortByValueLop(combine, SortKeys.OperationTypes.WithWeights, DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// Sort dimensions are same as the first input
sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
Data lit = Data.createLiteralLop(ValueType.DOUBLE, Double.toString(0.25));
setLineNumbers(lit);
PickByCount pick = new PickByCount(sort, lit, DataType.MATRIX, getValueType(), PickByCount.OperationTypes.RANGEPICK);
pick.getOutputParameters().setDimensions(-1, -1, getRowsInBlock(), getColsInBlock(), -1);
setLineNumbers(pick);
PartialAggregate pagg = new PartialAggregate(pick, HopsAgg2Lops.get(Hop.AggOp.SUM), HopsDirection2Lops.get(Hop.Direction.RowCol), DataType.MATRIX, getValueType());
setLineNumbers(pagg);
// Set the dimensions of PartialAggregate LOP based on the
// direction in which aggregation is performed
pagg.setDimensionsBasedOnDirection(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock());
Group group1 = new Group(pagg, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
setOutputDimensions(group1);
setLineNumbers(group1);
Aggregate agg1 = new Aggregate(group1, HopsAgg2Lops.get(Hop.AggOp.SUM), DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(agg1);
agg1.setupCorrectionLocation(pagg.getCorrectionLocation());
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), DataType.SCALAR, getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
Unary iqm = new Unary(sort, unary1, Unary.OperationTypes.MR_IQM, DataType.SCALAR, ValueType.DOUBLE, ExecType.CP);
iqm.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(iqm);
setLops(iqm);
} else {
SortKeys sort = SortKeys.constructSortByValueLop(getInput().get(0).constructLops(), getInput().get(1).constructLops(), SortKeys.OperationTypes.WithWeights, getInput().get(0).getDataType(), getInput().get(0).getValueType(), et);
sort.getOutputParameters().setDimensions(getInput().get(0).getDim1(), getInput().get(0).getDim2(), getInput().get(0).getRowsInBlock(), getInput().get(0).getColsInBlock(), getInput().get(0).getNnz());
PickByCount pick = new PickByCount(sort, null, getDataType(), getValueType(), PickByCount.OperationTypes.IQM, et, true);
setOutputDimensions(pick);
setLineNumbers(pick);
setLops(pick);
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method getRecordReaderInstructions.
/**
* Method to get record reader instructions for a MR job.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param inputStrings list of input strings
* @param recordReaderInstructions list of record reader instructions
* @param nodeIndexMapping node index mapping
* @param start_index start index
* @param inputLabels list of input labels
* @param inputLops list of input lops
* @param MRJobLineNumbers MR job line numbers
* @return -1 if problem
* @throws LopsException if LopsException occurs
*/
private static int getRecordReaderInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> recordReaderInstructions, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
// if input source, return index
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
// not input source and not in exec nodes, then return.
if (!execNodes.contains(node))
return -1;
ArrayList<Integer> inputIndices = new ArrayList<Integer>();
int max_input_index = -1;
// get mapper instructions
for (int i = 0; i < node.getInputs().size(); i++) {
// recurse
Lop childNode = node.getInputs().get(i);
int ret_val = getRecordReaderInstructions(childNode, execNodes, inputStrings, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
if (ret_val > max_input_index) {
max_input_index = ret_val;
//child_for_max_input_index = childNode;
}
}
// instructions
if ((node.getExecLocation() == ExecLocation.RecordReader)) {
int output_index = max_input_index;
// cannot reuse index if this is true
// need to add better indexing schemes
output_index = start_index[0];
start_index[0]++;
nodeIndexMapping.put(node, output_index);
// only Ranagepick lop can contribute to labels
if (node.getType() == Type.PickValues) {
PickByCount pbc = (PickByCount) node;
if (pbc.getOperationType() == PickByCount.OperationTypes.RANGEPICK) {
// always the second input is a scalar
int scalarIndex = 1;
// if data lop not a literal -- add label
if (node.getInputs().get(scalarIndex).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(scalarIndex))).isLiteral()) {
inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(scalarIndex));
}
// if not data lop, then this is an intermediate variable.
if (node.getInputs().get(scalarIndex).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(scalarIndex));
}
}
}
// get recordreader instruction.
if (node.getInputs().size() == 2) {
recordReaderInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
} else
throw new LopsException("Unexpected number of inputs while generating a RecordReader Instruction");
return output_index;
}
return -1;
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method doGreedyGrouping.
/**
* Method to group a vector of sorted lops.
*
* @param sb statement block
* @param node_v list of low-level operators
* @return list of instructions
* @throws LopsException if LopsException occurs
* @throws IOException if IOException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private ArrayList<Instruction> doGreedyGrouping(StatementBlock sb, ArrayList<Lop> node_v) throws LopsException, IOException, DMLRuntimeException {
if (LOG.isTraceEnabled())
LOG.trace("Grouping DAG ============");
// nodes to be executed in current iteration
ArrayList<Lop> execNodes = new ArrayList<Lop>();
// nodes that have already been processed
ArrayList<Lop> finishedNodes = new ArrayList<Lop>();
// nodes that are queued for the following iteration
ArrayList<Lop> queuedNodes = new ArrayList<Lop>();
ArrayList<ArrayList<Lop>> jobNodes = createNodeVectors(JobType.getNumJobTypes());
// list of instructions
ArrayList<Instruction> inst = new ArrayList<Instruction>();
//ArrayList<Instruction> preWriteDeleteInst = new ArrayList<Instruction>();
ArrayList<Instruction> writeInst = new ArrayList<Instruction>();
ArrayList<Instruction> deleteInst = new ArrayList<Instruction>();
ArrayList<Instruction> endOfBlockInst = new ArrayList<Instruction>();
// remove files for transient reads that are updated.
deleteUpdatedTransientReadVariables(sb, node_v, writeInst);
generateRemoveInstructions(sb, endOfBlockInst);
generateInstructionsForInputVariables(node_v, inst);
boolean done = false;
String indent = " ";
while (!done) {
if (LOG.isTraceEnabled())
LOG.trace("Grouping nodes in DAG");
execNodes.clear();
queuedNodes.clear();
clearNodeVectors(jobNodes);
gmrMapperFootprint = 0;
for (Lop node : node_v) {
// finished nodes don't need to be processed
if (finishedNodes.contains(node))
continue;
if (LOG.isTraceEnabled())
LOG.trace("Processing node (" + node.getID() + ") " + node.toString() + " exec nodes size is " + execNodes.size());
//its children nodes in execNodes
if (node.definesMRJob() && !compatibleWithChildrenInExecNodes(execNodes, node)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 1)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
// iteration
if (hasChildNode(node, queuedNodes)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 2)");
queuedNodes.add(node);
// if node has more than two inputs,
// remove children that will be needed in a future
// iterations
// may also have to remove parent nodes of these children
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
// if inputs come from different jobs, then queue
if (node.getInputs().size() >= 2) {
int jobid = Integer.MIN_VALUE;
boolean queueit = false;
for (int idx = 0; idx < node.getInputs().size(); idx++) {
int input_jobid = jobType(node.getInputs().get(idx), jobNodes);
if (input_jobid != -1) {
if (jobid == Integer.MIN_VALUE)
jobid = input_jobid;
else if (jobid != input_jobid) {
queueit = true;
break;
}
}
}
if (queueit) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 3)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
}
// See if this lop can be eliminated
// This check is for "aligner" lops (e.g., group)
boolean eliminate = false;
eliminate = canEliminateLop(node, execNodes);
if (eliminate) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, eliminate);
continue;
}
// children that defines a MR Job are present in execNodes
if (node.definesMRJob()) {
if (hasMRJobChildNode(node, execNodes)) {
// this is because "group" can be pushed into the "Rand" job.
if (!(node.getType() == Lop.Type.Grouping && checkDataGenAsChildNode(node, execNodes))) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 4)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
}
}
// not, queue "node"
if (node.getInputs().size() > 1 && hasChildNode(node, execNodes, ExecLocation.RecordReader)) {
// get the actual RecordReader lop
Lop rr_node = getChildNode(node, execNodes, ExecLocation.RecordReader);
// all inputs of "node" must be ancestors of rr_node
boolean queue_it = false;
for (Lop n : node.getInputs()) {
// each input should be ancestor of RecordReader lop
if (!n.equals(rr_node) && !isChild(rr_node, n, IDMap)) {
// i.e., "node" must be queued
queue_it = true;
break;
}
}
if (queue_it) {
// queue node
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 5)");
queuedNodes.add(node);
// TODO: does this have to be modified to handle
// recordreader lops?
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
} else {
// on "node"
;
}
}
// only write nodes are kept in execnodes
if (node.getExecLocation() == ExecLocation.Data) {
Data dnode = (Data) node;
boolean dnode_queued = false;
if (dnode.getOperationType() == OperationTypes.READ) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding Data -" + node.toString());
// TODO: avoid readScalar instruction, and read it on-demand just like the way Matrices are read in control program
if (node.getDataType() == DataType.SCALAR && //TODO: LEO check the following condition is still needed
node.getOutputParameters().getFile_name() != null) {
// this lop corresponds to reading a scalar from HDFS file
// add it to execNodes so that "readScalar" instruction gets generated
execNodes.add(node);
// note: no need to add it to any job vector
}
} else if (dnode.getOperationType() == OperationTypes.WRITE) {
// Skip the transient write <code>node</code> if the input is a
// transient read with the same variable name. i.e., a dummy copy.
// Hence, <code>node</code> can be avoided.
// TODO: this case should ideally be handled in the language layer
// prior to the construction of Hops Dag
Lop input = dnode.getInputs().get(0);
if (dnode.isTransient() && input.getExecLocation() == ExecLocation.Data && ((Data) input).isTransient() && dnode.getOutputParameters().getLabel().equals(input.getOutputParameters().getLabel())) {
// do nothing, <code>node</code> must not processed any further.
;
} else if (execNodes.contains(input) && !isCompatible(node, input) && sendWriteLopToMR(node)) {
// input is in execNodes but it is not compatible with write lop. So, queue the write lop.
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString());
queuedNodes.add(node);
dnode_queued = true;
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding Data -" + node.toString());
execNodes.add(node);
if (sendWriteLopToMR(node)) {
addNodeByJobType(node, jobNodes, execNodes, false);
}
}
}
if (!dnode_queued)
finishedNodes.add(node);
continue;
}
// map or reduce node, can always be piggybacked with parent
if (node.getExecLocation() == ExecLocation.MapOrReduce) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
continue;
}
// RecordReader node, add, if no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.RecordReader) {
// execNodes .. it has to be the first one in the job!
if (!hasChildNode(node, execNodes, ExecLocation.Map) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 6)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// map node, add, if no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.Map) {
boolean queueThisNode = false;
int subcode = -1;
if (node.usesDistributedCache()) {
// if an input to <code>node</code> comes from distributed cache
// then that input must get executed in one of the previous jobs.
int[] dcInputIndexes = node.distributedCacheInputIndex();
for (int dcInputIndex : dcInputIndexes) {
Lop dcInput = node.getInputs().get(dcInputIndex - 1);
if ((dcInput.getType() != Lop.Type.Data && dcInput.getExecType() == ExecType.MR) && execNodes.contains(dcInput)) {
queueThisNode = true;
subcode = 1;
}
}
// Limit the number of distributed cache inputs based on the available memory in mappers
double memsize = computeFootprintInMapper(node);
//gmrMapperFootprint += computeFootprintInMapper(node);
if (gmrMapperFootprint > 0 && !checkMemoryLimits(node, gmrMapperFootprint + memsize)) {
queueThisNode = true;
subcode = 2;
}
if (!queueThisNode)
gmrMapperFootprint += memsize;
}
if (!queueThisNode && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasMRJobChildNode(node, execNodes)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 7 - " + "subcode " + subcode + ")");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// reduce node, make sure no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.MapAndReduce) {
// not define a job
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, eliminate);
continue;
}
// aligned reduce, make sure a parent that is reduce exists
if (node.getExecLocation() == ExecLocation.Reduce) {
if (compatibleWithChildrenInExecNodes(execNodes, node) && (hasChildNode(node, execNodes, ExecLocation.MapAndReduce) || hasChildNode(node, execNodes, ExecLocation.Map))) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 8)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// that will be executed in a MR job.
if (node.getExecLocation() == ExecLocation.ControlProgram) {
for (Lop lop : node.getInputs()) {
if (execNodes.contains(lop) && !(lop.getExecLocation() == ExecLocation.Data) && !(lop.getExecLocation() == ExecLocation.ControlProgram)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 9)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
break;
}
}
if (queuedNodes.contains(node))
continue;
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding - scalar" + node.toString());
execNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
finishedNodes.add(node);
continue;
}
}
// no work to do
if (execNodes.isEmpty()) {
if (!queuedNodes.isEmpty()) {
//System.err.println("Queued nodes should be 0");
throw new LopsException("Queued nodes should not be 0 at this point \n");
}
if (LOG.isTraceEnabled())
LOG.trace("All done! queuedNodes = " + queuedNodes.size());
done = true;
} else {
if (LOG.isTraceEnabled())
LOG.trace("Generating jobs for group -- Node count=" + execNodes.size());
// first process scalar instructions
generateControlProgramJobs(execNodes, inst, writeInst, deleteInst);
// copy unassigned lops in execnodes to gmrnodes
for (int i = 0; i < execNodes.size(); i++) {
Lop node = execNodes.get(i);
if (jobType(node, jobNodes) == -1) {
if (isCompatible(node, JobType.GMR)) {
if (node.hasNonBlockedInputs()) {
jobNodes.get(JobType.GMRCELL.getId()).add(node);
addChildren(node, jobNodes.get(JobType.GMRCELL.getId()), execNodes);
} else {
jobNodes.get(JobType.GMR.getId()).add(node);
addChildren(node, jobNodes.get(JobType.GMR.getId()), execNodes);
}
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 10)");
execNodes.remove(i);
finishedNodes.remove(node);
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
}
}
// next generate MR instructions
if (!execNodes.isEmpty())
generateMRJobs(execNodes, inst, writeInst, deleteInst, jobNodes);
handleSingleOutputJobs(execNodes, jobNodes, finishedNodes);
}
}
// add write and delete inst at the very end.
//inst.addAll(preWriteDeleteInst);
inst.addAll(writeInst);
inst.addAll(deleteInst);
inst.addAll(endOfBlockInst);
return inst;
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method getMapperInstructions.
/**
* Method to get mapper instructions for a MR job.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param inputStrings list of input strings
* @param instructionsInMapper list of instructions in mapper
* @param nodeIndexMapping ?
* @param start_index starting index
* @param inputLabels input labels
* @param MRJoblineNumbers MR job line numbers
* @return -1 if problem
* @throws LopsException if LopsException occurs
*/
private int getMapperInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> instructionsInMapper, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
// if input source, return index
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
// not input source and not in exec nodes, then return.
if (!execNodes.contains(node))
return -1;
ArrayList<Integer> inputIndices = new ArrayList<Integer>();
int max_input_index = -1;
// get mapper instructions
for (Lop childNode : node.getInputs()) {
int ret_val = getMapperInstructions(childNode, execNodes, inputStrings, instructionsInMapper, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
if (ret_val > max_input_index) {
max_input_index = ret_val;
}
}
// to mapper instructions.
if ((node.getExecLocation() == ExecLocation.Map || node.getExecLocation() == ExecLocation.MapOrReduce) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasChildNode(node, execNodes, ExecLocation.Reduce)) {
int output_index = max_input_index;
// cannot reuse index if this is true
// need to add better indexing schemes
// if (child_for_max_input_index.getOutputs().size() > 1) {
output_index = start_index[0];
start_index[0]++;
// }
nodeIndexMapping.put(node, output_index);
if (node instanceof Unary && node.getInputs().size() > 1) {
// Following code must be executed only for those Unary
// operators that have more than one input
// It should not be executed for "true" unary operators like
// cos(A).
int index = 0;
for (int i1 = 0; i1 < node.getInputs().size(); i1++) {
if (node.getInputs().get(i1).getDataType() == DataType.SCALAR) {
index = i1;
break;
}
}
// if data lop not a literal -- add label
if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
// if not data lop, then this is an intermediate variable.
if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
}
// get mapper instruction.
if (node.getInputs().size() == 1)
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), output_index));
else if (node.getInputs().size() == 2) {
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
} else if (node.getInputs().size() == 3)
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
else if (node.getInputs().size() == 4) {
// Example: Reshape
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
} else if (node.getInputs().size() == 5) {
// Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u]
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), output_index));
} else if (node.getInputs().size() == 7) {
// Example: RangeBasedReIndex A[row_l:row_u, col_l:col_u] = B
instructionsInMapper.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), inputIndices.get(5), inputIndices.get(6), output_index));
} else
throw new LopsException("Node with " + node.getInputs().size() + " inputs is not supported in dag.java.");
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
return output_index;
}
return -1;
}
Aggregations