use of org.apache.sysml.lops.OutputParameters in project incubator-systemml by apache.
the class Dag method getOutputInfo.
/**
* Method that determines the output format for a given node.
*
* @param node low-level operator
* @param cellModeOverride override mode
* @return output info
* @throws LopsException if LopsException occurs
*/
private static OutputInfo getOutputInfo(Lop node, boolean cellModeOverride) throws LopsException {
if ((node.getDataType() == DataType.SCALAR && node.getExecType() == ExecType.CP) || node instanceof FunctionCallCP)
return null;
OutputInfo oinfo = null;
OutputParameters oparams = node.getOutputParameters();
if (oparams.isBlocked()) {
if (!cellModeOverride)
oinfo = OutputInfo.BinaryBlockOutputInfo;
else {
// output format is overridden, for example, due to recordReaderInstructions in the job
oinfo = OutputInfo.BinaryCellOutputInfo;
// which stores the outputInfo.
try {
oparams.setDimensions(oparams.getNumRows(), oparams.getNumCols(), -1, -1, oparams.getNnz(), oparams.getUpdateType());
} catch (HopsException e) {
throw new LopsException(node.printErrorLocation() + "error in getOutputInfo in Dag ", e);
}
}
} else {
if (oparams.getFormat() == Format.TEXT || oparams.getFormat() == Format.MM)
oinfo = OutputInfo.TextCellOutputInfo;
else if (oparams.getFormat() == Format.CSV) {
oinfo = OutputInfo.CSVOutputInfo;
} else {
oinfo = OutputInfo.BinaryCellOutputInfo;
}
}
/* Instead of following hardcoding, one must get this information from Lops */
if (node.getType() == Type.SortKeys && node.getExecType() == ExecType.MR) {
if (((SortKeys) node).getOpType() == SortKeys.OperationTypes.Indexes)
oinfo = OutputInfo.BinaryBlockOutputInfo;
else
oinfo = OutputInfo.OutputInfoForSortOutput;
} else if (node.getType() == Type.CombineBinary) {
// Output format of CombineBinary (CB) depends on how the output is consumed
CombineBinary combine = (CombineBinary) node;
if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
oinfo = OutputInfo.OutputInfoForSortInput;
} else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
oinfo = OutputInfo.WeightedPairOutputInfo;
}
} else if (node.getType() == Type.CombineTernary) {
oinfo = OutputInfo.WeightedPairOutputInfo;
} else if (node.getType() == Type.CentralMoment || node.getType() == Type.CoVariance) {
// CMMR always operate in "cell mode",
// and the output is always in cell format
oinfo = OutputInfo.BinaryCellOutputInfo;
}
return oinfo;
}
use of org.apache.sysml.lops.OutputParameters in project incubator-systemml by apache.
the class Dag method computeFootprintInMapper.
/**
* Computes the memory footprint required to execute <code>node</code> in the mapper.
* It is used only for those nodes that use inputs from distributed cache. The returned
* value is utilized in limiting the number of instructions piggybacked onto a single GMR mapper.
*
* @param node low-level operator
* @return memory footprint
*/
private static double computeFootprintInMapper(Lop node) {
// Memory limits must be checked only for nodes that use distributed cache
if (!node.usesDistributedCache())
// default behavior
return 0.0;
OutputParameters in1dims = node.getInputs().get(0).getOutputParameters();
OutputParameters in2dims = node.getInputs().get(1).getOutputParameters();
double footprint = 0;
if (node instanceof MapMult) {
int dcInputIndex = node.distributedCacheInputIndex()[0];
footprint = AggBinaryOp.getMapmmMemEstimate(in1dims.getNumRows(), in1dims.getNumCols(), in1dims.getRowsInBlock(), in1dims.getColsInBlock(), in1dims.getNnz(), in2dims.getNumRows(), in2dims.getNumCols(), in2dims.getRowsInBlock(), in2dims.getColsInBlock(), in2dims.getNnz(), dcInputIndex, false);
} else if (node instanceof PMMJ) {
int dcInputIndex = node.distributedCacheInputIndex()[0];
footprint = AggBinaryOp.getMapmmMemEstimate(in1dims.getNumRows(), 1, in1dims.getRowsInBlock(), in1dims.getColsInBlock(), in1dims.getNnz(), in2dims.getNumRows(), in2dims.getNumCols(), in2dims.getRowsInBlock(), in2dims.getColsInBlock(), in2dims.getNnz(), dcInputIndex, true);
} else if (node instanceof AppendM) {
footprint = BinaryOp.footprintInMapper(in1dims.getNumRows(), in1dims.getNumCols(), in2dims.getNumRows(), in2dims.getNumCols(), in1dims.getRowsInBlock(), in1dims.getColsInBlock());
} else if (node instanceof BinaryM) {
footprint = BinaryOp.footprintInMapper(in1dims.getNumRows(), in1dims.getNumCols(), in2dims.getNumRows(), in2dims.getNumCols(), in1dims.getRowsInBlock(), in1dims.getColsInBlock());
} else {
// default behavior
return 0.0;
}
return footprint;
}
use of org.apache.sysml.lops.OutputParameters in project incubator-systemml by apache.
the class Dag method setupNodeOutputs.
/**
* Method to setup output filenames and outputInfos, and to generate related instructions
*
* @param node low-level operator
* @param et exec type
* @param cellModeOverride override mode
* @param copyTWrite ?
* @return node output
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws LopsException if LopsException occurs
*/
private NodeOutput setupNodeOutputs(Lop node, ExecType et, boolean cellModeOverride, boolean copyTWrite) throws DMLRuntimeException, LopsException {
OutputParameters oparams = node.getOutputParameters();
NodeOutput out = new NodeOutput();
node.setConsumerCount(node.getOutputs().size());
// Compute the output format for this node
out.setOutInfo(getOutputInfo(node, cellModeOverride));
// since outputs are explicitly specified
if (node.getExecLocation() != ExecLocation.Data) {
if (node.getDataType() == DataType.SCALAR) {
oparams.setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
out.setVarName(oparams.getLabel());
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
} else if (node instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) node).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM) {
ParameterizedBuiltin pbi = (ParameterizedBuiltin) node;
Lop input = pbi.getNamedInput(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_DATA);
if (input.getDataType() == DataType.FRAME) {
// Output of transform is in CSV format, which gets subsequently reblocked
// TODO: change it to output binaryblock
Data dataInput = (Data) input;
oparams.setFile_name(getNextUniqueFilename());
oparams.setLabel(getNextUniqueVarname(DataType.MATRIX));
// generate an instruction that creates a symbol table entry for the new variable in CSV format
Data delimLop = (Data) dataInput.getNamedInputLop(DataExpression.DELIM_DELIMITER, DataExpression.DEFAULT_DELIM_DELIMITER);
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, DataType.MATRIX, OutputInfo.outputInfoToString(OutputInfo.CSVOutputInfo), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), -1, -1, oparams.getNnz()), oparams.getUpdateType(), false, delimLop.getStringValue(), true);
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// temp file as well as the variable has to be deleted at the end
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the generated filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
throw new LopsException("Input to transform() has an invalid type: " + input.getDataType() + ", it must be FRAME.");
}
} else if (//general case
!(node instanceof FunctionCallCP)) {
// generate temporary filename and a variable name to hold the
// output produced by "rootNode"
oparams.setFile_name(getNextUniqueFilename());
oparams.setLabel(getNextUniqueVarname(node.getDataType()));
// generate an instruction that creates a symbol table entry for the new variable
//String createInst = prepareVariableInstruction("createvar", node);
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// temp file as well as the variable has to be deleted at the end
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the generated filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
// If the function call is set with output lops (e.g., multi return builtin),
// generate a createvar instruction for each function output
FunctionCallCP fcall = (FunctionCallCP) node;
if (fcall.getFunctionOutputs() != null) {
for (Lop fnOut : fcall.getFunctionOutputs()) {
OutputParameters fnOutParams = fnOut.getOutputParameters();
//OutputInfo oinfo = getOutputInfo((N)fnOut, false);
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(), true, fnOut.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(fnOut, false)), new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int) fnOutParams.getRowsInBlock(), (int) fnOutParams.getColsInBlock(), fnOutParams.getNnz()), oparams.getUpdateType());
if (node._beginLine != 0)
createvarInst.setLocation(node);
else
createvarInst.setLocation(fnOut);
out.addPreInstruction(createvarInst);
}
}
}
} else // rootNode is of type Data
{
if (node.getDataType() == DataType.SCALAR) {
// generate assignment operations for final and transient writes
if (oparams.getFile_name() == null && !(node instanceof Data && ((Data) node).isPersistentWrite())) {
String io_inst = prepareAssignVarInstruction(node.getInputs().get(0), node);
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
} else {
//CP PERSISTENT WRITE SCALARS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
}
} else {
if (((Data) node).isTransient()) {
if (et == ExecType.CP) {
// If transient matrix write is in CP then its input MUST be executed in CP as well.
// get variable and filename associated with the input
String inputFileName = node.getInputs().get(0).getOutputParameters().getFile_name();
String inputVarName = node.getInputs().get(0).getOutputParameters().getLabel();
String constVarName = oparams.getLabel();
String constFileName = inputFileName + constVarName;
/*
* Symbol Table state must change as follows:
*
* FROM:
* mvar1 -> temp21
*
* TO:
* mVar1 -> temp21
* tVarH -> temp21
*/
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(inputVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
out.setFileName(constFileName);
} else {
if (copyTWrite) {
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(node.getInputs().get(0).getOutputParameters().getLabel(), oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
return out;
}
/*
* Since the "rootNode" is a transient data node, we first need to generate a
* temporary filename as well as a variable name to hold the <i>immediate</i>
* output produced by "rootNode". These generated HDFS filename and the
* variable name must be changed at the end of an iteration/program block
* so that the subsequent iteration/program block can correctly access the
* generated data. Therefore, we need to distinguish between the following:
*
* 1) Temporary file name & variable name: They hold the immediate output
* produced by "rootNode". Both names are generated below.
*
* 2) Constant file name & variable name: They are constant across iterations.
* Variable name is given by rootNode's label that is created in the upper layers.
* File name is generated by concatenating "temporary file name" and "constant variable name".
*
* Temporary files must be moved to constant files at the end of the iteration/program block.
*/
// generate temporary filename & var name
String tempVarName = oparams.getLabel() + "temp";
String tempFileName = getNextUniqueFilename();
//String createInst = prepareVariableInstruction("createvar", tempVarName, node.getDataType(), node.getValueType(), tempFileName, oparams, out.getOutInfo());
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(tempVarName, tempFileName, true, node.getDataType(), OutputInfo.outputInfoToString(out.getOutInfo()), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
String constVarName = oparams.getLabel();
String constFileName = tempFileName + constVarName;
oparams.setFile_name(getFilePath() + constFileName);
/*
* Since this is a node that denotes a transient read/write, we need to make sure
* that the data computed for a given variable in a given iteration is passed on
* to the next iteration. This is done by generating miscellaneous instructions
* that gets executed at the end of the program block.
*
* The state of the symbol table must change
*
* FROM:
* tVarA -> temp21tVarA (old copy of temp21)
* tVarAtemp -> temp21 (new copy that should override the old copy)
*
* TO:
* tVarA -> temp21tVarA
*/
// rename the temp variable to constant variable (e.g., cpvar tVarAtemp tVarA)
/*Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(tempVarName, constVarName);
if(DMLScript.ENABLE_DEBUG_MODE) {
currInstr.setLineNum(node._beginLine);
}
out.addLastInstruction(currInstr);
Instruction tempInstr = VariableCPInstruction.prepareRemoveInstruction(tempVarName);
if(DMLScript.ENABLE_DEBUG_MODE) {
tempInstr.setLineNum(node._beginLine);
}
out.addLastInstruction(tempInstr);*/
// Generate a single mvvar instruction (e.g., mvvar tempA A)
// instead of two instructions "cpvar tempA A" and "rmvar tempA"
Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the temporary filename and variable name to the list of outputs
out.setFileName(tempFileName);
out.setVarName(tempVarName);
}
} else // rootNode is not a transient write. It is a persistent write.
{
if (et == ExecType.MR) {
//MR PERSISTENT WRITE
// create a variable to hold the result produced by this "rootNode"
oparams.setLabel("pVar" + var_index.getNextID());
//String createInst = prepareVariableInstruction("createvar", node);
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Lop fnameLop = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String fnameStr = (fnameLop instanceof Data && ((Data) fnameLop).isLiteral()) ? fnameLop.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fnameLop.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
Instruction createvarInst;
// part MM format file on hdfs.
if (oparams.getFormat() == Format.CSV) {
String tempFileName = getNextUniqueFilename();
String createInst = node.getInstructions(tempFileName);
createvarInst = CPInstructionParser.parseSingleInstruction(createInst);
//NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else if (oparams.getFormat() == Format.MM) {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), getNextUniqueFilename(), false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
//NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), fnameStr, false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
// remove the variable
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "false" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
}
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// finally, add the filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
//CP PERSISTENT WRITE
// generate a write instruction that writes matrix to HDFS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
Instruction currInstr = null;
Lop inputLop = node.getInputs().get(0);
// Move the temporary file on HDFS to required persistent location, insteadof copying.
if (inputLop.getExecLocation() == ExecLocation.Data && inputLop.getOutputs().size() == 1 && ((Data) inputLop).isTransient() && ((Data) inputLop).getOutputParameters().isBlocked() && node.getOutputParameters().isBlocked()) {
// transient read feeding into persistent write in blocked representation
// simply, move the file
//prepare filename (literal or variable in order to support dynamic write)
String fnameStr = (fname instanceof Data && ((Data) fname).isLiteral()) ? fname.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fname.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
currInstr = (CPInstruction) VariableCPInstruction.prepareMoveInstruction(inputLop.getOutputParameters().getLabel(), fnameStr, "binaryblock");
} else {
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
if (node.getExecType() == ExecType.SPARK)
// This will throw an exception if the exectype of hop is set incorrectly
// Note: the exec type and exec location of lops needs to be set to SPARK and ControlProgram respectively
currInstr = SPInstructionParser.parseSingleInstruction(io_inst);
else
currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
}
if (!node.getInputs().isEmpty() && node.getInputs().get(0)._beginLine != 0)
currInstr.setLocation(node.getInputs().get(0));
else
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
}
}
}
}
return out;
}
Aggregations