use of org.apache.sysml.lops.LopsException in project incubator-systemml by apache.
the class Dag method addNodeByJobType.
/*
* Add node, and its relevant children to job-specific node vectors.
*/
private void addNodeByJobType(Lop node, ArrayList<ArrayList<Lop>> arr, ArrayList<Lop> execNodes, boolean eliminate) {
if (!eliminate) {
// Check if this lop defines a MR job.
if (node.definesMRJob()) {
// find the corresponding JobType
JobType jt = JobType.findJobTypeFromLop(node);
if (jt == null) {
throw new LopsException(node.printErrorLocation() + "No matching JobType is found for a the lop type: " + node.getType() + " \n");
}
if (jt == JobType.GMR) {
if (node.hasNonBlockedInputs()) {
int gmrcell_index = JobType.GMRCELL.getId();
arr.get(gmrcell_index).add(node);
int from = arr.get(gmrcell_index).size();
addChildren(node, arr.get(gmrcell_index), execNodes);
int to = arr.get(gmrcell_index).size();
if (// check against GMR only, not against GMRCELL
!isCompatible(arr.get(gmrcell_index), JobType.GMR, from, to))
throw new LopsException(node.printErrorLocation() + "Error during compatibility check \n");
} else {
// then add it to RAND job. Otherwise, create a GMR job
if (hasChildNode(node, arr.get(JobType.DATAGEN.getId()))) {
arr.get(JobType.DATAGEN.getId()).add(node);
// we should NOT call 'addChildren' because appropriate
// child nodes would have got added to RAND job already
} else {
int gmr_index = JobType.GMR.getId();
arr.get(gmr_index).add(node);
int from = arr.get(gmr_index).size();
addChildren(node, arr.get(gmr_index), execNodes);
int to = arr.get(gmr_index).size();
if (!isCompatible(arr.get(gmr_index), JobType.GMR, from, to))
throw new LopsException(node.printErrorLocation() + "Error during compatibility check \n");
}
}
} else {
int index = jt.getId();
arr.get(index).add(node);
int from = arr.get(index).size();
addChildren(node, arr.get(index), execNodes);
int to = arr.get(index).size();
// check if all added nodes are compatible with current job
if (!isCompatible(arr.get(index), jt, from, to)) {
throw new LopsException("Unexpected error in addNodeByType.");
}
}
return;
}
}
if (eliminate) {
// Note that eliminate flag is set only for 'group' lops
if (node.hasNonBlockedInputs())
arr.get(JobType.GMRCELL.getId()).add(node);
else
arr.get(JobType.GMR.getId()).add(node);
return;
}
/*
* If this lop does not define a job, check if it uses the output of any
* specialized job. i.e., if this lop has a child node in any of the
* job-specific vector, then add it to the vector. Note: This lop must
* be added to ONLY ONE of the job-specific vectors.
*/
int numAdded = 0;
for (JobType j : JobType.values()) {
if (j.getId() > 0 && hasDirectChildNode(node, arr.get(j.getId()))) {
if (isCompatible(node, j)) {
arr.get(j.getId()).add(node);
numAdded += 1;
}
}
}
if (numAdded > 1) {
throw new LopsException("Unexpected error in addNodeByJobType(): A given lop can ONLY be added to a single job vector (numAdded = " + numAdded + ").");
}
}
use of org.apache.sysml.lops.LopsException in project incubator-systemml by apache.
the class Dag method getAggAndOtherInstructions.
/**
* Method to populate aggregate and other instructions in reducer.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param shuffleInstructions list of shuffle instructions
* @param aggInstructionsReducer ?
* @param otherInstructionsReducer ?
* @param nodeIndexMapping node index mapping
* @param start_index start index
* @param inputLabels list of input labels
* @param inputLops list of input lops
* @param MRJobLineNumbers MR job line numbers
* @return -1 if problem
*/
private int getAggAndOtherInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> shuffleInstructions, ArrayList<String> aggInstructionsReducer, ArrayList<String> otherInstructionsReducer, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
int ret_val = -1;
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
if (!execNodes.contains(node))
return ret_val;
ArrayList<Integer> inputIndices = new ArrayList<>();
// first element.
if (node.getType() == Lop.Type.Data && ((Data) node).getOperationType() == Data.OperationTypes.WRITE) {
ret_val = getAggAndOtherInstructions(node.getInputs().get(0), execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
} else {
for (Lop cnode : node.getInputs()) {
ret_val = getAggAndOtherInstructions(cnode, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
}
}
if (node.getExecLocation() == ExecLocation.Data) {
if (((Data) node).getFileFormatType() == FileFormatTypes.CSV) {
// Generate write instruction, which goes into CSV_WRITE Job
int output_index = start_index[0];
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return ret_val;
}
}
if (node.getExecLocation() == ExecLocation.MapAndReduce) {
/* Generate Shuffle Instruction for "node", and return the index associated with produced output */
boolean instGenerated = true;
int output_index = start_index[0];
switch(node.getType()) {
/* Lop types that take a single input */
case ReBlock:
case CSVReBlock:
case SortKeys:
case CentralMoment:
case CoVariance:
case GroupedAgg:
case DataPartition:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
case ParameterizedBuiltin:
break;
/* Lop types that take two inputs */
case MMCJ:
case MMRJ:
case CombineBinary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
/* Lop types that take three inputs */
case CombineTernary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
default:
instGenerated = false;
break;
}
if (instGenerated) {
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return inputIndices.get(0);
}
}
/* Get instructions for aligned reduce and other lops below the reduce. */
if (node.getExecLocation() == ExecLocation.Reduce || node.getExecLocation() == ExecLocation.MapOrReduce || hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
if (inputIndices.size() == 1) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Aggregate) {
aggInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
}
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else if (inputIndices.size() == 2) {
int output_index = start_index[0];
start_index[0]++;
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
if (node instanceof Unary && node.getInputs().size() > 1) {
int index = 0;
for (int i = 0; i < node.getInputs().size(); i++) {
if (node.getInputs().get(i).getDataType() == DataType.SCALAR) {
index = i;
break;
}
}
if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
}
return output_index;
} else if (inputIndices.size() == 3 || node.getType() == Type.Ctable) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Ctable) {
// in case of CTABLE_TRANSFORM_SCALAR_WEIGHT: inputIndices.get(2) would be -1
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else if (node.getType() == Type.ParameterizedBuiltin) {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
}
return output_index;
} else if (inputIndices.size() == 4 || inputIndices.size() == 5) {
int output_index = start_index[0];
start_index[0]++;
if (inputIndices.size() == 4)
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
else
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), inputIndices.get(4), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else
throw new LopsException("Invalid number of inputs to a lop: " + inputIndices.size());
}
return -1;
}
use of org.apache.sysml.lops.LopsException in project incubator-systemml by apache.
the class Dag method getInputPathsAndParameters.
// Method to populate inputs and also populates node index mapping.
private static void getInputPathsAndParameters(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<InputInfo> inputInfos, ArrayList<Long> numRows, ArrayList<Long> numCols, ArrayList<Long> numRowsPerBlock, ArrayList<Long> numColsPerBlock, HashMap<Lop, Integer> nodeIndexMapping, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
// treat rand as an input.
if (node.getType() == Type.DataGen && execNodes.contains(node) && !nodeIndexMapping.containsKey(node)) {
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
inputStrings.add(node.getInstructions(inputStrings.size(), inputStrings.size()));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
inputInfos.add(InputInfo.TextCellInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// get input file names
if (!execNodes.contains(node) && !nodeIndexMapping.containsKey(node) && !(node.getExecLocation() == ExecLocation.Data) && (!(node.getExecLocation() == ExecLocation.ControlProgram && node.getDataType() == DataType.SCALAR)) || (!execNodes.contains(node) && node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() != DataType.SCALAR && !nodeIndexMapping.containsKey(node))) {
if (node.getOutputParameters().getFile_name() != null) {
inputStrings.add(node.getOutputParameters().getFile_name());
} else {
// use label name
inputStrings.add(Lop.VARIABLE_NAME_PLACEHOLDER + node.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER);
}
inputLabels.add(node.getOutputParameters().getLabel());
inputLops.add(node);
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
InputInfo nodeInputInfo = null;
// Check if file format type is binary or text and update infos
if (node.getOutputParameters().isBlocked()) {
if (node.getOutputParameters().getFormat() == Format.BINARY)
nodeInputInfo = InputInfo.BinaryBlockInputInfo;
else
throw new LopsException("Invalid format (" + node.getOutputParameters().getFormat() + ") encountered for a node/lop (ID=" + node.getID() + ") with blocked output.");
} else {
if (node.getOutputParameters().getFormat() == Format.TEXT)
nodeInputInfo = InputInfo.TextCellInputInfo;
else
nodeInputInfo = InputInfo.BinaryCellInputInfo;
}
// the information on key/value classes
if (node.getType() == Type.SortKeys) {
// SortKeys is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of SortKeys, which is
// (compactformat, doubleWriteable, IntWritable)
nodeInputInfo = new InputInfo(PickFromCompactInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (node.getType() == Type.CombineBinary) {
// CombineBinary is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of CombineBinary
// And, the outputInfo of CombineBinary depends on the operation!
CombineBinary combine = (CombineBinary) node;
if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
nodeInputInfo = new InputInfo(SequenceFileInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
} else if (node.getType() == Type.CombineTernary) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
inputInfos.add(nodeInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// if exec nodes does not contain node at this point, return.
if (!execNodes.contains(node))
return;
// process children recursively
for (Lop lop : node.getInputs()) {
getInputPathsAndParameters(lop, execNodes, inputStrings, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
}
use of org.apache.sysml.lops.LopsException in project systemml by apache.
the class Dag method getRecordReaderInstructions.
/**
* Method to get record reader instructions for a MR job.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param inputStrings list of input strings
* @param recordReaderInstructions list of record reader instructions
* @param nodeIndexMapping node index mapping
* @param start_index start index
* @param inputLabels list of input labels
* @param inputLops list of input lops
* @param MRJobLineNumbers MR job line numbers
* @return -1 if problem
*/
private static int getRecordReaderInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<String> recordReaderInstructions, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
// if input source, return index
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
// not input source and not in exec nodes, then return.
if (!execNodes.contains(node))
return -1;
ArrayList<Integer> inputIndices = new ArrayList<>();
int max_input_index = -1;
// get mapper instructions
for (int i = 0; i < node.getInputs().size(); i++) {
// recurse
Lop childNode = node.getInputs().get(i);
int ret_val = getRecordReaderInstructions(childNode, execNodes, inputStrings, recordReaderInstructions, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
if (ret_val > max_input_index) {
max_input_index = ret_val;
// child_for_max_input_index = childNode;
}
}
// instructions
if ((node.getExecLocation() == ExecLocation.RecordReader)) {
int output_index = max_input_index;
// cannot reuse index if this is true
// need to add better indexing schemes
output_index = start_index[0];
start_index[0]++;
nodeIndexMapping.put(node, output_index);
// only Ranagepick lop can contribute to labels
if (node.getType() == Type.PickValues) {
PickByCount pbc = (PickByCount) node;
if (pbc.getOperationType() == PickByCount.OperationTypes.RANGEPICK) {
// always the second input is a scalar
int scalarIndex = 1;
// if data lop not a literal -- add label
if (node.getInputs().get(scalarIndex).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(scalarIndex))).isLiteral()) {
inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(scalarIndex));
}
// if not data lop, then this is an intermediate variable.
if (node.getInputs().get(scalarIndex).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(scalarIndex).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(scalarIndex));
}
}
}
// get recordreader instruction.
if (node.getInputs().size() == 2) {
recordReaderInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
} else
throw new LopsException("Unexpected number of inputs while generating a RecordReader Instruction");
return output_index;
}
return -1;
}
use of org.apache.sysml.lops.LopsException in project systemml by apache.
the class Dag method addNodeByJobType.
/*
* Add node, and its relevant children to job-specific node vectors.
*/
private void addNodeByJobType(Lop node, ArrayList<ArrayList<Lop>> arr, ArrayList<Lop> execNodes, boolean eliminate) {
if (!eliminate) {
// Check if this lop defines a MR job.
if (node.definesMRJob()) {
// find the corresponding JobType
JobType jt = JobType.findJobTypeFromLop(node);
if (jt == null) {
throw new LopsException(node.printErrorLocation() + "No matching JobType is found for a the lop type: " + node.getType() + " \n");
}
if (jt == JobType.GMR) {
if (node.hasNonBlockedInputs()) {
int gmrcell_index = JobType.GMRCELL.getId();
arr.get(gmrcell_index).add(node);
int from = arr.get(gmrcell_index).size();
addChildren(node, arr.get(gmrcell_index), execNodes);
int to = arr.get(gmrcell_index).size();
if (// check against GMR only, not against GMRCELL
!isCompatible(arr.get(gmrcell_index), JobType.GMR, from, to))
throw new LopsException(node.printErrorLocation() + "Error during compatibility check \n");
} else {
// then add it to RAND job. Otherwise, create a GMR job
if (hasChildNode(node, arr.get(JobType.DATAGEN.getId()))) {
arr.get(JobType.DATAGEN.getId()).add(node);
// we should NOT call 'addChildren' because appropriate
// child nodes would have got added to RAND job already
} else {
int gmr_index = JobType.GMR.getId();
arr.get(gmr_index).add(node);
int from = arr.get(gmr_index).size();
addChildren(node, arr.get(gmr_index), execNodes);
int to = arr.get(gmr_index).size();
if (!isCompatible(arr.get(gmr_index), JobType.GMR, from, to))
throw new LopsException(node.printErrorLocation() + "Error during compatibility check \n");
}
}
} else {
int index = jt.getId();
arr.get(index).add(node);
int from = arr.get(index).size();
addChildren(node, arr.get(index), execNodes);
int to = arr.get(index).size();
// check if all added nodes are compatible with current job
if (!isCompatible(arr.get(index), jt, from, to)) {
throw new LopsException("Unexpected error in addNodeByType.");
}
}
return;
}
}
if (eliminate) {
// Note that eliminate flag is set only for 'group' lops
if (node.hasNonBlockedInputs())
arr.get(JobType.GMRCELL.getId()).add(node);
else
arr.get(JobType.GMR.getId()).add(node);
return;
}
/*
* If this lop does not define a job, check if it uses the output of any
* specialized job. i.e., if this lop has a child node in any of the
* job-specific vector, then add it to the vector. Note: This lop must
* be added to ONLY ONE of the job-specific vectors.
*/
int numAdded = 0;
for (JobType j : JobType.values()) {
if (j.getId() > 0 && hasDirectChildNode(node, arr.get(j.getId()))) {
if (isCompatible(node, j)) {
arr.get(j.getId()).add(node);
numAdded += 1;
}
}
}
if (numAdded > 1) {
throw new LopsException("Unexpected error in addNodeByJobType(): A given lop can ONLY be added to a single job vector (numAdded = " + numAdded + ").");
}
}
Aggregations