use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.
the class Dag method deleteUpdatedTransientReadVariables.
private static void deleteUpdatedTransientReadVariables(StatementBlock sb, ArrayList<Lop> nodeV, ArrayList<Instruction> inst) {
if (sb == null)
return;
if (LOG.isTraceEnabled())
LOG.trace("In delete updated variables");
// CANDIDATE list of variables which could have been updated in this statement block
HashMap<String, Lop> labelNodeMapping = new HashMap<>();
// ACTUAL list of variables whose value is updated, AND the old value of the variable
// is no longer accessible/used.
HashSet<String> updatedLabels = new HashSet<>();
HashMap<String, Lop> updatedLabelsLineNum = new HashMap<>();
// first capture all transient read variables
for (Lop node : nodeV) {
if (node.getExecLocation() == ExecLocation.Data && ((Data) node).isTransient() && ((Data) node).getOperationType() == OperationTypes.READ && ((Data) node).getDataType() == DataType.MATRIX) {
// "node" is considered as updated ONLY IF the old value is not used any more
// So, make sure that this READ node does not feed into any (transient/persistent) WRITE
boolean hasWriteParent = false;
for (Lop p : node.getOutputs()) {
if (p.getExecLocation() == ExecLocation.Data) {
// if the "p" is of type Data, then it has to be a WRITE
hasWriteParent = true;
break;
}
}
if (!hasWriteParent) {
// node has no parent of type WRITE, so this is a CANDIDATE variable
// add it to labelNodeMapping so that it is considered in further processing
labelNodeMapping.put(node.getOutputParameters().getLabel(), node);
}
}
}
// capture updated transient write variables
for (Lop node : nodeV) {
if (node.getExecLocation() == ExecLocation.Data && ((Data) node).isTransient() && ((Data) node).getOperationType() == OperationTypes.WRITE && ((Data) node).getDataType() == DataType.MATRIX && // check to make sure corresponding (i.e., with the same label/name) transient read is present
labelNodeMapping.containsKey(node.getOutputParameters().getLabel()) && // check to avoid cases where transient read feeds into a transient write
!labelNodeMapping.containsValue(node.getInputs().get(0))) {
updatedLabels.add(node.getOutputParameters().getLabel());
updatedLabelsLineNum.put(node.getOutputParameters().getLabel(), node);
}
}
// generate RM instructions
Instruction rm_inst = null;
for (String label : updatedLabels) {
rm_inst = VariableCPInstruction.prepareRemoveInstruction(label);
rm_inst.setLocation(updatedLabelsLineNum.get(label));
if (LOG.isTraceEnabled())
LOG.trace(rm_inst.toString());
inst.add(rm_inst);
}
}
use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.
the class Dag method doGreedyGrouping.
/**
* Method to group a vector of sorted lops.
*
* @param sb statement block
* @param node_v list of low-level operators
* @return list of instructions
*/
private ArrayList<Instruction> doGreedyGrouping(StatementBlock sb, ArrayList<Lop> node_v) {
if (LOG.isTraceEnabled())
LOG.trace("Grouping DAG ============");
// nodes to be executed in current iteration
ArrayList<Lop> execNodes = new ArrayList<>();
// nodes that have already been processed
ArrayList<Lop> finishedNodes = new ArrayList<>();
// nodes that are queued for the following iteration
ArrayList<Lop> queuedNodes = new ArrayList<>();
ArrayList<ArrayList<Lop>> jobNodes = createNodeVectors(JobType.getNumJobTypes());
// list of instructions
ArrayList<Instruction> inst = new ArrayList<>();
// ArrayList<Instruction> preWriteDeleteInst = new ArrayList<Instruction>();
ArrayList<Instruction> writeInst = new ArrayList<>();
ArrayList<Instruction> deleteInst = new ArrayList<>();
ArrayList<Instruction> endOfBlockInst = new ArrayList<>();
// remove files for transient reads that are updated.
deleteUpdatedTransientReadVariables(sb, node_v, writeInst);
generateRemoveInstructions(sb, endOfBlockInst);
generateInstructionsForInputVariables(node_v, inst);
boolean done = false;
String indent = " ";
while (!done) {
if (LOG.isTraceEnabled())
LOG.trace("Grouping nodes in DAG");
execNodes.clear();
queuedNodes.clear();
clearNodeVectors(jobNodes);
gmrMapperFootprint = 0;
for (Lop node : node_v) {
// finished nodes don't need to be processed
if (finishedNodes.contains(node))
continue;
if (LOG.isTraceEnabled())
LOG.trace("Processing node (" + node.getID() + ") " + node.toString() + " exec nodes size is " + execNodes.size());
// its children nodes in execNodes
if (node.definesMRJob() && !compatibleWithChildrenInExecNodes(execNodes, node)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 1)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
// iteration
if (hasChildNode(node, queuedNodes)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 2)");
queuedNodes.add(node);
// if node has more than two inputs,
// remove children that will be needed in a future
// iterations
// may also have to remove parent nodes of these children
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
// if inputs come from different jobs, then queue
if (node.getInputs().size() >= 2) {
int jobid = Integer.MIN_VALUE;
boolean queueit = false;
for (int idx = 0; idx < node.getInputs().size(); idx++) {
int input_jobid = jobType(node.getInputs().get(idx), jobNodes);
if (input_jobid != -1) {
if (jobid == Integer.MIN_VALUE)
jobid = input_jobid;
else if (jobid != input_jobid) {
queueit = true;
break;
}
}
}
if (queueit) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 3)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
}
// See if this lop can be eliminated
// This check is for "aligner" lops (e.g., group)
boolean eliminate = false;
eliminate = canEliminateLop(node, execNodes);
if (eliminate) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, eliminate);
continue;
}
// children that defines a MR Job are present in execNodes
if (node.definesMRJob()) {
if (hasMRJobChildNode(node, execNodes)) {
// this is because "group" can be pushed into the "Rand" job.
if (!(node.getType() == Lop.Type.Grouping && checkDataGenAsChildNode(node, execNodes))) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing node " + node.toString() + " (code 4)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
}
}
}
// not, queue "node"
if (node.getInputs().size() > 1 && hasChildNode(node, execNodes, ExecLocation.RecordReader)) {
// get the actual RecordReader lop
Lop rr_node = getChildNode(node, execNodes, ExecLocation.RecordReader);
// all inputs of "node" must be ancestors of rr_node
boolean queue_it = false;
for (Lop n : node.getInputs()) {
// each input should be ancestor of RecordReader lop
if (!n.equals(rr_node) && !isChild(rr_node, n, IDMap)) {
// i.e., "node" must be queued
queue_it = true;
break;
}
}
if (queue_it) {
// queue node
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 5)");
queuedNodes.add(node);
// TODO: does this have to be modified to handle
// recordreader lops?
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
continue;
} else {
// nothing here.. subsequent checks have to be performed
// on "node"
}
}
// only write nodes are kept in execnodes
if (node.getExecLocation() == ExecLocation.Data) {
Data dnode = (Data) node;
boolean dnode_queued = false;
if (dnode.getOperationType() == OperationTypes.READ) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding Data -" + node.toString());
// TODO: avoid readScalar instruction, and read it on-demand just like the way Matrices are read in control program
if (node.getDataType() == DataType.SCALAR && // TODO: LEO check the following condition is still needed
node.getOutputParameters().getFile_name() != null) {
// this lop corresponds to reading a scalar from HDFS file
// add it to execNodes so that "readScalar" instruction gets generated
execNodes.add(node);
// note: no need to add it to any job vector
}
} else if (dnode.getOperationType() == OperationTypes.WRITE) {
// Skip the transient write <code>node</code> if the input is a
// transient read with the same variable name. i.e., a dummy copy.
// Hence, <code>node</code> can be avoided.
// TODO: this case should ideally be handled in the language layer
// prior to the construction of Hops Dag
Lop input = dnode.getInputs().get(0);
if (dnode.isTransient() && input.getExecLocation() == ExecLocation.Data && ((Data) input).isTransient() && dnode.getOutputParameters().getLabel().equals(input.getOutputParameters().getLabel())) {
// do nothing, <code>node</code> must not processed any further.
} else if (execNodes.contains(input) && !isCompatible(node, input) && sendWriteLopToMR(node)) {
// input is in execNodes but it is not compatible with write lop. So, queue the write lop.
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString());
queuedNodes.add(node);
dnode_queued = true;
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding Data -" + node.toString());
execNodes.add(node);
if (sendWriteLopToMR(node)) {
addNodeByJobType(node, jobNodes, execNodes, false);
}
}
}
if (!dnode_queued)
finishedNodes.add(node);
continue;
}
// map or reduce node, can always be piggybacked with parent
if (node.getExecLocation() == ExecLocation.MapOrReduce) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
continue;
}
// RecordReader node, add, if no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.RecordReader) {
// execNodes .. it has to be the first one in the job!
if (!hasChildNode(node, execNodes, ExecLocation.Map) && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 6)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// map node, add, if no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.Map) {
boolean queueThisNode = false;
int subcode = -1;
if (node.usesDistributedCache()) {
// if an input to <code>node</code> comes from distributed cache
// then that input must get executed in one of the previous jobs.
int[] dcInputIndexes = node.distributedCacheInputIndex();
for (int dcInputIndex : dcInputIndexes) {
Lop dcInput = node.getInputs().get(dcInputIndex - 1);
if ((dcInput.getType() != Lop.Type.Data && dcInput.getExecType() == ExecType.MR) && execNodes.contains(dcInput)) {
queueThisNode = true;
subcode = 1;
}
}
// Limit the number of distributed cache inputs based on the available memory in mappers
double memsize = computeFootprintInMapper(node);
if (gmrMapperFootprint > 0 && !checkMemoryLimits(node, gmrMapperFootprint + memsize)) {
queueThisNode = true;
subcode = 2;
}
if (!queueThisNode)
gmrMapperFootprint += memsize;
}
if (!queueThisNode && !hasChildNode(node, execNodes, ExecLocation.MapAndReduce) && !hasMRJobChildNode(node, execNodes)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 7 - " + "subcode " + subcode + ")");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// reduce node, make sure no parent needs reduce, else queue
if (node.getExecLocation() == ExecLocation.MapAndReduce) {
// not define a job
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, eliminate);
continue;
}
// aligned reduce, make sure a parent that is reduce exists
if (node.getExecLocation() == ExecLocation.Reduce) {
if (compatibleWithChildrenInExecNodes(execNodes, node) && (hasChildNode(node, execNodes, ExecLocation.MapAndReduce) || hasChildNode(node, execNodes, ExecLocation.Map))) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding -" + node.toString());
execNodes.add(node);
finishedNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 8)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
continue;
}
// that will be executed in a MR job.
if (node.getExecLocation() == ExecLocation.ControlProgram) {
for (Lop lop : node.getInputs()) {
if (execNodes.contains(lop) && !(lop.getExecLocation() == ExecLocation.Data) && !(lop.getExecLocation() == ExecLocation.ControlProgram)) {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 9)");
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
break;
}
}
if (queuedNodes.contains(node))
continue;
if (LOG.isTraceEnabled())
LOG.trace(indent + "Adding - scalar" + node.toString());
execNodes.add(node);
addNodeByJobType(node, jobNodes, execNodes, false);
finishedNodes.add(node);
continue;
}
}
// no work to do
if (execNodes.isEmpty()) {
if (!queuedNodes.isEmpty())
throw new LopsException("Queued nodes should not be 0 at this point \n");
if (LOG.isTraceEnabled())
LOG.trace("All done! queuedNodes = " + queuedNodes.size());
done = true;
} else {
if (LOG.isTraceEnabled())
LOG.trace("Generating jobs for group -- Node count=" + execNodes.size());
// first process scalar instructions
generateControlProgramJobs(execNodes, inst, writeInst, deleteInst);
// copy unassigned lops in execnodes to gmrnodes
for (int i = 0; i < execNodes.size(); i++) {
Lop node = execNodes.get(i);
if (jobType(node, jobNodes) == -1) {
if (isCompatible(node, JobType.GMR)) {
if (node.hasNonBlockedInputs()) {
jobNodes.get(JobType.GMRCELL.getId()).add(node);
addChildren(node, jobNodes.get(JobType.GMRCELL.getId()), execNodes);
} else {
jobNodes.get(JobType.GMR.getId()).add(node);
addChildren(node, jobNodes.get(JobType.GMR.getId()), execNodes);
}
} else {
if (LOG.isTraceEnabled())
LOG.trace(indent + "Queueing -" + node.toString() + " (code 10)");
execNodes.remove(i);
finishedNodes.remove(node);
queuedNodes.add(node);
removeNodesForNextIteration(node, finishedNodes, execNodes, queuedNodes, jobNodes);
}
}
}
// next generate MR instructions
if (!execNodes.isEmpty())
generateMRJobs(execNodes, inst, writeInst, deleteInst, jobNodes);
handleSingleOutputJobs(execNodes, jobNodes, finishedNodes);
}
}
// add write and delete inst at the very end.
// inst.addAll(preWriteDeleteInst);
inst.addAll(writeInst);
inst.addAll(deleteInst);
inst.addAll(endOfBlockInst);
return inst;
}
use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.
the class Dag method createPackedRmvarInstructions.
private static ArrayList<Instruction> createPackedRmvarInstructions(ArrayList<Instruction> insts) {
ArrayList<Instruction> ret = new ArrayList<>();
ArrayList<String> currRmVar = new ArrayList<>();
for (Instruction inst : insts) {
if (inst instanceof VariableCPInstruction && ((VariableCPInstruction) inst).isRemoveVariableNoFile()) {
// collect all subsequent rmvar instructions
currRmVar.add(((VariableCPInstruction) inst).getInput1().getName());
} else {
// construct packed rmvar instruction
if (!currRmVar.isEmpty()) {
ret.add(VariableCPInstruction.prepareRemoveInstruction(currRmVar.toArray(new String[0])));
currRmVar.clear();
}
// add other instruction
ret.add(inst);
}
}
// construct last packed rmvar instruction
if (!currRmVar.isEmpty()) {
ret.add(VariableCPInstruction.prepareRemoveInstruction(currRmVar.toArray(new String[0])));
}
return ret;
}
use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.
the class Dag method setupNodeOutputs.
/**
* Method to setup output filenames and outputInfos, and to generate related instructions
*
* @param node low-level operator
* @param et exec type
* @param cellModeOverride override mode
* @param copyTWrite ?
* @return node output
*/
private NodeOutput setupNodeOutputs(Lop node, ExecType et, boolean cellModeOverride, boolean copyTWrite) {
OutputParameters oparams = node.getOutputParameters();
NodeOutput out = new NodeOutput();
node.setConsumerCount(node.getOutputs().size());
// Compute the output format for this node
out.setOutInfo(getOutputInfo(node, cellModeOverride));
// since outputs are explicitly specified
if (node.getExecLocation() != ExecLocation.Data) {
if (node.getDataType() == DataType.SCALAR) {
oparams.setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
out.setVarName(oparams.getLabel());
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
} else if (// general case
!(node instanceof FunctionCallCP)) {
// generate temporary filename and a variable name to hold the
// output produced by "rootNode"
oparams.setFile_name(getNextUniqueFilename());
oparams.setLabel(getNextUniqueVarname(node.getDataType()));
// generate an instruction that creates a symbol table entry for the new variable
// String createInst = prepareVariableInstruction("createvar", node);
// out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// temp file as well as the variable has to be deleted at the end
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the generated filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
// If the function call is set with output lops (e.g., multi return builtin),
// generate a createvar instruction for each function output
FunctionCallCP fcall = (FunctionCallCP) node;
if (fcall.getFunctionOutputs() != null) {
for (Lop fnOut : fcall.getFunctionOutputs()) {
OutputParameters fnOutParams = fnOut.getOutputParameters();
// OutputInfo oinfo = getOutputInfo((N)fnOut, false);
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(), true, fnOut.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(fnOut, false)), new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int) fnOutParams.getRowsInBlock(), (int) fnOutParams.getColsInBlock(), fnOutParams.getNnz()), oparams.getUpdateType());
if (node._beginLine != 0)
createvarInst.setLocation(node);
else
createvarInst.setLocation(fnOut);
out.addPreInstruction(createvarInst);
}
}
}
} else // rootNode is of type Data
{
if (node.getDataType() == DataType.SCALAR) {
// generate assignment operations for final and transient writes
if (oparams.getFile_name() == null && !(node instanceof Data && ((Data) node).isPersistentWrite())) {
String io_inst = prepareAssignVarInstruction(node.getInputs().get(0), node);
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
} else {
// CP PERSISTENT WRITE SCALARS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
}
} else {
if (((Data) node).isTransient()) {
if (et == ExecType.CP) {
// If transient matrix write is in CP then its input MUST be executed in CP as well.
// get variable and filename associated with the input
String inputFileName = node.getInputs().get(0).getOutputParameters().getFile_name();
String inputVarName = node.getInputs().get(0).getOutputParameters().getLabel();
String constVarName = oparams.getLabel();
String constFileName = inputFileName + constVarName;
/*
* Symbol Table state must change as follows:
*
* FROM:
* mvar1 -> temp21
*
* TO:
* mVar1 -> temp21
* tVarH -> temp21
*/
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(inputVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
out.setFileName(constFileName);
} else {
if (copyTWrite) {
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(node.getInputs().get(0).getOutputParameters().getLabel(), oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
return out;
}
/*
* Since the "rootNode" is a transient data node, we first need to generate a
* temporary filename as well as a variable name to hold the <i>immediate</i>
* output produced by "rootNode". These generated HDFS filename and the
* variable name must be changed at the end of an iteration/program block
* so that the subsequent iteration/program block can correctly access the
* generated data. Therefore, we need to distinguish between the following:
*
* 1) Temporary file name & variable name: They hold the immediate output
* produced by "rootNode". Both names are generated below.
*
* 2) Constant file name & variable name: They are constant across iterations.
* Variable name is given by rootNode's label that is created in the upper layers.
* File name is generated by concatenating "temporary file name" and "constant variable name".
*
* Temporary files must be moved to constant files at the end of the iteration/program block.
*/
// generate temporary filename & var name
String tempVarName = oparams.getLabel() + "temp";
String tempFileName = getNextUniqueFilename();
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(tempVarName, tempFileName, true, node.getDataType(), OutputInfo.outputInfoToString(out.getOutInfo()), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
String constVarName = oparams.getLabel();
String constFileName = tempFileName + constVarName;
oparams.setFile_name(getFilePath() + constFileName);
/*
* Since this is a node that denotes a transient read/write, we need to make sure
* that the data computed for a given variable in a given iteration is passed on
* to the next iteration. This is done by generating miscellaneous instructions
* that gets executed at the end of the program block.
*
* The state of the symbol table must change
*
* FROM:
* tVarA -> temp21tVarA (old copy of temp21)
* tVarAtemp -> temp21 (new copy that should override the old copy)
*
* TO:
* tVarA -> temp21tVarA
*/
// Generate a single mvvar instruction (e.g., mvvar tempA A)
// instead of two instructions "cpvar tempA A" and "rmvar tempA"
Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the temporary filename and variable name to the list of outputs
out.setFileName(tempFileName);
out.setVarName(tempVarName);
}
} else // rootNode is not a transient write. It is a persistent write.
{
if (et == ExecType.MR) {
// MR PERSISTENT WRITE
// create a variable to hold the result produced by this "rootNode"
oparams.setLabel("pVar" + var_index.getNextID());
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Lop fnameLop = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String fnameStr = (fnameLop instanceof Data && ((Data) fnameLop).isLiteral()) ? fnameLop.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fnameLop.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
Instruction createvarInst;
// part MM format file on hdfs.
if (oparams.getFormat() == Format.CSV) {
String tempFileName = getNextUniqueFilename();
String createInst = node.getInstructions(tempFileName);
createvarInst = CPInstructionParser.parseSingleInstruction(createInst);
// NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else if (oparams.getFormat() == Format.MM) {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), getNextUniqueFilename(), false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
// NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), fnameStr, false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
// remove the variable
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "false" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
}
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// finally, add the filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
// CP PERSISTENT WRITE
// generate a write instruction that writes matrix to HDFS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
Instruction currInstr = (node.getExecType() == ExecType.SPARK) ? SPInstructionParser.parseSingleInstruction(io_inst) : CPInstructionParser.parseSingleInstruction(io_inst);
currInstr.setLocation((!node.getInputs().isEmpty() && node.getInputs().get(0)._beginLine != 0) ? node.getInputs().get(0) : node);
out.addLastInstruction(currInstr);
}
}
}
}
return out;
}
use of org.apache.sysml.runtime.instructions.Instruction in project incubator-systemml by apache.
the class Dag method generateMapReduceInstructions.
/**
* Method to generate MapReduce job instructions from a given set of nodes.
*
* @param execNodes list of exec nodes
* @param inst list of instructions
* @param writeinst list of write instructions
* @param deleteinst list of delete instructions
* @param rmvarinst list of rmvar instructions
* @param jt job type
*/
private void generateMapReduceInstructions(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeinst, ArrayList<Instruction> deleteinst, ArrayList<Instruction> rmvarinst, JobType jt) {
ArrayList<Byte> resultIndices = new ArrayList<>();
ArrayList<String> inputs = new ArrayList<>();
ArrayList<String> outputs = new ArrayList<>();
ArrayList<InputInfo> inputInfos = new ArrayList<>();
ArrayList<OutputInfo> outputInfos = new ArrayList<>();
ArrayList<Long> numRows = new ArrayList<>();
ArrayList<Long> numCols = new ArrayList<>();
ArrayList<Long> numRowsPerBlock = new ArrayList<>();
ArrayList<Long> numColsPerBlock = new ArrayList<>();
ArrayList<String> mapperInstructions = new ArrayList<>();
ArrayList<String> randInstructions = new ArrayList<>();
ArrayList<String> recordReaderInstructions = new ArrayList<>();
int numReducers = 0;
int replication = 1;
ArrayList<String> inputLabels = new ArrayList<>();
ArrayList<String> outputLabels = new ArrayList<>();
ArrayList<Instruction> renameInstructions = new ArrayList<>();
ArrayList<Instruction> variableInstructions = new ArrayList<>();
ArrayList<Instruction> postInstructions = new ArrayList<>();
ArrayList<Integer> MRJobLineNumbers = null;
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers = new ArrayList<>();
}
ArrayList<Lop> inputLops = new ArrayList<>();
boolean cellModeOverride = false;
/* Find the nodes that produce an output */
ArrayList<Lop> rootNodes = new ArrayList<>();
getOutputNodes(execNodes, rootNodes, jt);
if (LOG.isTraceEnabled())
LOG.trace("# of root nodes = " + rootNodes.size());
/* Remove transient writes that are simple copy of transient reads */
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
ArrayList<Lop> markedNodes = new ArrayList<>();
// only keep data nodes that are results of some computation.
for (Lop rnode : rootNodes) {
if (rnode.getExecLocation() == ExecLocation.Data && ((Data) rnode).isTransient() && ((Data) rnode).getOperationType() == OperationTypes.WRITE && ((Data) rnode).getDataType() == DataType.MATRIX) {
// no computation, just a copy
if (rnode.getInputs().get(0).getExecLocation() == ExecLocation.Data && ((Data) rnode.getInputs().get(0)).isTransient() && rnode.getOutputParameters().getLabel().equals(rnode.getInputs().get(0).getOutputParameters().getLabel())) {
markedNodes.add(rnode);
}
}
}
// delete marked nodes
rootNodes.removeAll(markedNodes);
markedNodes.clear();
if (rootNodes.isEmpty())
return;
}
// structure that maps node to their indices that will be used in the instructions
HashMap<Lop, Integer> nodeIndexMapping = new HashMap<>();
for (Lop rnode : rootNodes) {
getInputPathsAndParameters(rnode, execNodes, inputs, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
// In case of RAND job, instructions are defined in the input file
if (jt == JobType.DATAGEN)
randInstructions = inputs;
int[] start_index = new int[1];
start_index[0] = inputs.size();
// currently, recordreader instructions are allowed only in GMR jobs
if (jt == JobType.GMR || jt == JobType.GMRCELL) {
for (Lop rnode : rootNodes) {
getRecordReaderInstructions(rnode, execNodes, inputs, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (recordReaderInstructions.size() > 1)
throw new LopsException("MapReduce job can only have a single recordreader instruction: " + recordReaderInstructions.toString());
}
}
//
if (jt != JobType.REBLOCK && jt != JobType.CSV_REBLOCK && jt != JobType.DATAGEN) {
for (int i = 0; i < inputInfos.size(); i++) if (inputInfos.get(i) == InputInfo.BinaryCellInputInfo || inputInfos.get(i) == InputInfo.TextCellInputInfo)
cellModeOverride = true;
}
if (!recordReaderInstructions.isEmpty() || jt == JobType.GROUPED_AGG)
cellModeOverride = true;
for (int i = 0; i < rootNodes.size(); i++) {
getMapperInstructions(rootNodes.get(i), execNodes, inputs, mapperInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Input strings: " + inputs.toString());
if (jt == JobType.DATAGEN)
LOG.trace(" Rand instructions: " + getCSVString(randInstructions));
if (jt == JobType.GMR)
LOG.trace(" RecordReader instructions: " + getCSVString(recordReaderInstructions));
LOG.trace(" Mapper instructions: " + getCSVString(mapperInstructions));
}
/* Get Shuffle and Reducer Instructions */
ArrayList<String> shuffleInstructions = new ArrayList<>();
ArrayList<String> aggInstructionsReducer = new ArrayList<>();
ArrayList<String> otherInstructionsReducer = new ArrayList<>();
for (Lop rn : rootNodes) {
int resultIndex = getAggAndOtherInstructions(rn, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
if (resultIndex == -1)
throw new LopsException("Unexpected error in piggybacking!");
if (rn.getExecLocation() == ExecLocation.Data && ((Data) rn).getOperationType() == Data.OperationTypes.WRITE && ((Data) rn).isTransient() && rootNodes.contains(rn.getInputs().get(0))) {
// Both rn (a transient write) and its input are root nodes.
// Instead of creating two copies of the data, simply generate a cpvar instruction
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, true);
writeinst.addAll(out.getLastInstructions());
} else {
resultIndices.add(Byte.valueOf((byte) resultIndex));
// setup output filenames and outputInfos and generate related instructions
NodeOutput out = setupNodeOutputs(rn, ExecType.MR, cellModeOverride, false);
outputLabels.add(out.getVarName());
outputs.add(out.getFileName());
outputInfos.add(out.getOutInfo());
if (LOG.isTraceEnabled()) {
LOG.trace(" Output Info: " + out.getFileName() + ";" + OutputInfo.outputInfoToString(out.getOutInfo()) + ";" + out.getVarName());
}
renameInstructions.addAll(out.getLastInstructions());
variableInstructions.addAll(out.getPreInstructions());
postInstructions.addAll(out.getPostInstructions());
}
}
/* Determine if the output dimensions are known */
byte[] resultIndicesByte = new byte[resultIndices.size()];
for (int i = 0; i < resultIndicesByte.length; i++) {
resultIndicesByte[i] = resultIndices.get(i).byteValue();
}
if (LOG.isTraceEnabled()) {
LOG.trace(" Shuffle Instructions: " + getCSVString(shuffleInstructions));
LOG.trace(" Aggregate Instructions: " + getCSVString(aggInstructionsReducer));
LOG.trace(" Other instructions =" + getCSVString(otherInstructionsReducer));
LOG.trace(" Output strings: " + outputs.toString());
LOG.trace(" ResultIndices = " + resultIndices.toString());
}
/* Prepare the MapReduce job instruction */
MRJobInstruction mr = new MRJobInstruction(jt);
// check if this is a map-only job. If not, set the number of reducers
if (!shuffleInstructions.isEmpty() || !aggInstructionsReducer.isEmpty() || !otherInstructionsReducer.isEmpty())
numReducers = total_reducers;
// set inputs, outputs, and other other properties for the job
mr.setInputOutputLabels(inputLabels.toArray(new String[0]), outputLabels.toArray(new String[0]));
mr.setOutputs(resultIndicesByte);
mr.setDimsUnknownFilePrefix(getFilePath());
mr.setNumberOfReducers(numReducers);
mr.setReplication(replication);
// set instructions for recordReader and mapper
mr.setRecordReaderInstructions(getCSVString(recordReaderInstructions));
mr.setMapperInstructions(getCSVString(mapperInstructions));
// compute and set mapper memory requirements (for consistency of runtime piggybacking)
if (jt == JobType.GMR) {
double mem = 0;
for (Lop n : execNodes) mem += computeFootprintInMapper(n);
mr.setMemoryRequirements(mem);
}
if (jt == JobType.DATAGEN)
mr.setRandInstructions(getCSVString(randInstructions));
// set shuffle instructions
mr.setShuffleInstructions(getCSVString(shuffleInstructions));
// set reducer instruction
mr.setAggregateInstructionsInReducer(getCSVString(aggInstructionsReducer));
mr.setOtherInstructionsInReducer(getCSVString(otherInstructionsReducer));
if (DMLScript.ENABLE_DEBUG_MODE) {
// set line number information for each MR instruction
mr.setMRJobInstructionsLineNumbers(MRJobLineNumbers);
}
/* Add the prepared instructions to output set */
inst.addAll(variableInstructions);
inst.add(mr);
inst.addAll(postInstructions);
deleteinst.addAll(renameInstructions);
for (Lop l : inputLops) {
if (DMLScript.ENABLE_DEBUG_MODE) {
processConsumers(l, rmvarinst, deleteinst, l);
} else {
processConsumers(l, rmvarinst, deleteinst, null);
}
}
}
Aggregations