use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLopsGroupedAggregate.
private void constructLopsGroupedAggregate(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
//reset reblock requirement (see MR aggregate / construct lops)
setRequiresReblock(false);
//determine output dimensions
long outputDim1 = -1, outputDim2 = -1;
Lop numGroups = inputlops.get(Statement.GAGG_NUM_GROUPS);
if (!dimsKnown() && numGroups != null && numGroups instanceof Data && ((Data) numGroups).isLiteral()) {
long ngroups = ((Data) numGroups).getLongValue();
Lop input = inputlops.get(GroupedAggregate.COMBINEDINPUT);
long inDim1 = input.getOutputParameters().getNumRows();
long inDim2 = input.getOutputParameters().getNumCols();
boolean rowwise = (inDim1 == 1 && inDim2 > 1);
if (rowwise) {
//vector
outputDim1 = ngroups;
outputDim2 = 1;
} else {
//vector or matrix
outputDim1 = inDim2;
outputDim2 = ngroups;
}
}
//construct lops
if (et == ExecType.MR) {
Lop grp_agg = null;
// construct necessary lops: combineBinary/combineTertiary and groupedAgg
boolean isWeighted = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) != null);
if (isWeighted) {
Lop append = BinaryOp.constructAppendLopChain(getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)), getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS)), getInput().get(_paramIndexMap.get(Statement.GAGG_WEIGHTS)), DataType.MATRIX, getValueType(), true, getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET)));
// add the combine lop to parameter list, with a new name "combinedinput"
inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
inputlops.remove(Statement.GAGG_TARGET);
inputlops.remove(Statement.GAGG_GROUPS);
inputlops.remove(Statement.GAGG_WEIGHTS);
grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
setRequiresReblock(true);
} else {
Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
Lop append = null;
//physical operator selection
double groupsSizeP = OptimizerUtils.estimatePartitionedSizeExactSparsity(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz());
if (//mapgroupedagg
groupsSizeP < OptimizerUtils.getRemoteMemBudgetMap(true) && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
//pre partitioning
boolean needPart = (groups.dimsKnown() && groups.getDim1() * groups.getDim2() > DistributedCacheInput.PARTITION_SIZE);
if (needPart) {
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(groups.getDim1(), groups.getDim2(), 1.0) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
ExecType.MR;
Lop dcinput = new DataPartition(groups.constructLops(), DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.ROW_BLOCK_WISE_N);
dcinput.getOutputParameters().setDimensions(groups.getDim1(), groups.getDim2(), target.getRowsInBlock(), target.getColsInBlock(), groups.getNnz());
setLineNumbers(dcinput);
inputlops.put(Statement.GAGG_GROUPS, dcinput);
}
Lop grp_agg_m = new GroupedAggregateM(inputlops, getDataType(), getValueType(), needPart, ExecType.MR);
grp_agg_m.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
setLineNumbers(grp_agg_m);
//post aggregation
Group grp = new Group(grp_agg_m, Group.OperationTypes.Sort, getDataType(), getValueType());
grp.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
grp_agg = agg1;
//note: no reblock required
} else //general case: groupedagg
{
if (// multi-column-block result matrix
target.getDim2() >= target.getColsInBlock() || // unkown
target.getDim2() <= 0) {
long m1_dim1 = target.getDim1();
long m1_dim2 = target.getDim2();
long m2_dim1 = groups.getDim1();
long m2_dim2 = groups.getDim2();
long m3_dim1 = m1_dim1;
long m3_dim2 = ((m1_dim2 > 0 && m2_dim2 > 0) ? (m1_dim2 + m2_dim2) : -1);
long m3_nnz = (target.getNnz() > 0 && groups.getNnz() > 0) ? (target.getNnz() + groups.getNnz()) : -1;
long brlen = target.getRowsInBlock();
long bclen = target.getColsInBlock();
Lop offset = createOffsetLop(target, true);
Lop rep = new RepMat(groups.constructLops(), offset, true, groups.getDataType(), groups.getValueType());
setOutputDimensions(rep);
setLineNumbers(rep);
Group group1 = new Group(target.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, target.getValueType());
group1.getOutputParameters().setDimensions(m1_dim1, m1_dim2, brlen, bclen, target.getNnz());
setLineNumbers(group1);
Group group2 = new Group(rep, Group.OperationTypes.Sort, DataType.MATRIX, groups.getValueType());
group1.getOutputParameters().setDimensions(m2_dim1, m2_dim2, brlen, bclen, groups.getNnz());
setLineNumbers(group2);
append = new AppendR(group1, group2, DataType.MATRIX, ValueType.DOUBLE, true, ExecType.MR);
append.getOutputParameters().setDimensions(m3_dim1, m3_dim2, brlen, bclen, m3_nnz);
setLineNumbers(append);
} else //single-column-block vector or matrix
{
append = BinaryOp.constructMRAppendLop(target, groups, DataType.MATRIX, getValueType(), true, target);
}
// add the combine lop to parameter list, with a new name "combinedinput"
inputlops.put(GroupedAggregate.COMBINEDINPUT, append);
inputlops.remove(Statement.GAGG_TARGET);
inputlops.remove(Statement.GAGG_GROUPS);
grp_agg = new GroupedAggregate(inputlops, isWeighted, getDataType(), getValueType());
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
setRequiresReblock(true);
}
}
setLineNumbers(grp_agg);
setLops(grp_agg);
} else //CP/Spark
{
Lop grp_agg = null;
if (et == ExecType.CP) {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, k);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, getRowsInBlock(), getColsInBlock(), -1);
} else if (et == ExecType.SPARK) {
//physical operator selection
Hop groups = getInput().get(_paramIndexMap.get(Statement.GAGG_GROUPS));
boolean broadcastGroups = (_paramIndexMap.get(Statement.GAGG_WEIGHTS) == null && OptimizerUtils.checkSparkBroadcastMemoryBudget(groups.getDim1(), groups.getDim2(), groups.getRowsInBlock(), groups.getColsInBlock(), groups.getNnz()));
if (//mapgroupedagg
broadcastGroups && getInput().get(_paramIndexMap.get(Statement.GAGG_FN)) instanceof LiteralOp && ((LiteralOp) getInput().get(_paramIndexMap.get(Statement.GAGG_FN))).getStringValue().equals("sum") && inputlops.get(Statement.GAGG_NUM_GROUPS) != null) {
Hop target = getInput().get(_paramIndexMap.get(Statement.GAGG_TARGET));
grp_agg = new GroupedAggregateM(inputlops, getDataType(), getValueType(), true, ExecType.SPARK);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, target.getRowsInBlock(), target.getColsInBlock(), -1);
//no reblock required (directly output binary block)
} else //groupedagg (w/ or w/o broadcast)
{
grp_agg = new GroupedAggregate(inputlops, getDataType(), getValueType(), et, broadcastGroups);
grp_agg.getOutputParameters().setDimensions(outputDim1, outputDim2, -1, -1, -1);
setRequiresReblock(true);
}
}
setLineNumbers(grp_agg);
setLops(grp_agg);
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class DataOp method constructLops.
@Override
public Lop constructLops() throws HopsException, LopsException {
//return already created lops
if (getLops() != null)
return getLops();
ExecType et = optFindExecType();
Lop l = null;
// construct lops for all input parameters
HashMap<String, Lop> inputLops = new HashMap<String, Lop>();
for (Entry<String, Integer> cur : _paramIndexMap.entrySet()) {
inputLops.put(cur.getKey(), getInput().get(cur.getValue()).constructLops());
}
// Create the lop
switch(_dataop) {
case TRANSIENTREAD:
l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
setOutputDimensions(l);
break;
case PERSISTENTREAD:
l = new Data(HopsData2Lops.get(_dataop), null, inputLops, getName(), null, getDataType(), getValueType(), false, getInputFormatType());
l.getOutputParameters().setDimensions(getDim1(), getDim2(), _inRowsInBlock, _inColsInBlock, getNnz(), getUpdateType());
break;
case PERSISTENTWRITE:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), false, getInputFormatType());
((Data) l).setExecType(et);
setOutputDimensions(l);
break;
case TRANSIENTWRITE:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
setOutputDimensions(l);
break;
case FUNCTIONOUTPUT:
l = new Data(HopsData2Lops.get(_dataop), getInput().get(0).constructLops(), inputLops, getName(), null, getDataType(), getValueType(), true, getInputFormatType());
((Data) l).setExecType(et);
setOutputDimensions(l);
break;
default:
throw new LopsException("Invalid operation type for Data LOP: " + _dataop);
}
setLineNumbers(l);
setLops(l);
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method getInputPathsAndParameters.
// Method to populate inputs and also populates node index mapping.
private static void getInputPathsAndParameters(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<InputInfo> inputInfos, ArrayList<Long> numRows, ArrayList<Long> numCols, ArrayList<Long> numRowsPerBlock, ArrayList<Long> numColsPerBlock, HashMap<Lop, Integer> nodeIndexMapping, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
// treat rand as an input.
if (node.getType() == Type.DataGen && execNodes.contains(node) && !nodeIndexMapping.containsKey(node)) {
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
inputStrings.add(node.getInstructions(inputStrings.size(), inputStrings.size()));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
inputInfos.add(InputInfo.TextCellInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// get input file names
if (!execNodes.contains(node) && !nodeIndexMapping.containsKey(node) && !(node.getExecLocation() == ExecLocation.Data) && (!(node.getExecLocation() == ExecLocation.ControlProgram && node.getDataType() == DataType.SCALAR)) || (!execNodes.contains(node) && node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() != DataType.SCALAR && !nodeIndexMapping.containsKey(node))) {
if (node.getOutputParameters().getFile_name() != null) {
inputStrings.add(node.getOutputParameters().getFile_name());
} else {
// use label name
inputStrings.add(Lop.VARIABLE_NAME_PLACEHOLDER + node.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER);
}
inputLabels.add(node.getOutputParameters().getLabel());
inputLops.add(node);
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
InputInfo nodeInputInfo = null;
// Check if file format type is binary or text and update infos
if (node.getOutputParameters().isBlocked()) {
if (node.getOutputParameters().getFormat() == Format.BINARY)
nodeInputInfo = InputInfo.BinaryBlockInputInfo;
else
throw new LopsException("Invalid format (" + node.getOutputParameters().getFormat() + ") encountered for a node/lop (ID=" + node.getID() + ") with blocked output.");
} else {
if (node.getOutputParameters().getFormat() == Format.TEXT)
nodeInputInfo = InputInfo.TextCellInputInfo;
else
nodeInputInfo = InputInfo.BinaryCellInputInfo;
}
// the information on key/value classes
if (node.getType() == Type.SortKeys) {
// SortKeys is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of SortKeys, which is
// (compactformat, doubleWriteable, IntWritable)
nodeInputInfo = new InputInfo(PickFromCompactInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (node.getType() == Type.CombineBinary) {
// CombineBinary is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of CombineBinary
// And, the outputInfo of CombineBinary depends on the operation!
CombineBinary combine = (CombineBinary) node;
if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
nodeInputInfo = new InputInfo(SequenceFileInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
} else if (node.getType() == Type.CombineTernary) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
inputInfos.add(nodeInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// if exec nodes does not contain node at this point, return.
if (!execNodes.contains(node))
return;
// process children recursively
for (Lop lop : node.getInputs()) {
getInputPathsAndParameters(lop, execNodes, inputStrings, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method getAggAndOtherInstructions.
/**
* Method to populate aggregate and other instructions in reducer.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param shuffleInstructions list of shuffle instructions
* @param aggInstructionsReducer ?
* @param otherInstructionsReducer ?
* @param nodeIndexMapping node index mapping
* @param start_index start index
* @param inputLabels list of input labels
* @param inputLops list of input lops
* @param MRJobLineNumbers MR job line numbers
* @return -1 if problem
* @throws LopsException if LopsException occurs
*/
private int getAggAndOtherInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> shuffleInstructions, ArrayList<String> aggInstructionsReducer, ArrayList<String> otherInstructionsReducer, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
int ret_val = -1;
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
if (!execNodes.contains(node))
return ret_val;
ArrayList<Integer> inputIndices = new ArrayList<Integer>();
// first element.
if (node.getType() == Lop.Type.Data && ((Data) node).getOperationType() == Data.OperationTypes.WRITE) {
ret_val = getAggAndOtherInstructions(node.getInputs().get(0), execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
} else {
for (Lop cnode : node.getInputs()) {
ret_val = getAggAndOtherInstructions(cnode, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
}
}
if (node.getExecLocation() == ExecLocation.Data) {
if (((Data) node).getFileFormatType() == FileFormatTypes.CSV && !(node.getInputs().get(0) instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) node.getInputs().get(0)).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM)) {
// Generate write instruction, which goes into CSV_WRITE Job
int output_index = start_index[0];
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return ret_val;
}
}
if (node.getExecLocation() == ExecLocation.MapAndReduce) {
/* Generate Shuffle Instruction for "node", and return the index associated with produced output */
boolean instGenerated = true;
int output_index = start_index[0];
switch(node.getType()) {
/* Lop types that take a single input */
case ReBlock:
case CSVReBlock:
case SortKeys:
case CentralMoment:
case CoVariance:
case GroupedAgg:
case DataPartition:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
case ParameterizedBuiltin:
if (((ParameterizedBuiltin) node).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM) {
shuffleInstructions.add(node.getInstructions(output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
}
break;
/* Lop types that take two inputs */
case MMCJ:
case MMRJ:
case CombineBinary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
/* Lop types that take three inputs */
case CombineTernary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
default:
instGenerated = false;
break;
}
if (instGenerated) {
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return inputIndices.get(0);
}
}
/* Get instructions for aligned reduce and other lops below the reduce. */
if (node.getExecLocation() == ExecLocation.Reduce || node.getExecLocation() == ExecLocation.MapOrReduce || hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
if (inputIndices.size() == 1) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Aggregate) {
aggInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
}
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else if (inputIndices.size() == 2) {
int output_index = start_index[0];
start_index[0]++;
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
if (node instanceof Unary && node.getInputs().size() > 1) {
int index = 0;
for (int i = 0; i < node.getInputs().size(); i++) {
if (node.getInputs().get(i).getDataType() == DataType.SCALAR) {
index = i;
break;
}
}
if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
}
return output_index;
} else if (inputIndices.size() == 3 || node.getType() == Type.Ternary) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Ternary) {
// in case of CTABLE_TRANSFORM_SCALAR_WEIGHT: inputIndices.get(2) would be -1
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else if (node.getType() == Type.ParameterizedBuiltin) {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
}
return output_index;
} else if (inputIndices.size() == 4) {
int output_index = start_index[0];
start_index[0]++;
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else
throw new LopsException("Invalid number of inputs to a lop: " + inputIndices.size());
}
return -1;
}
use of org.apache.sysml.lops.Data in project incubator-systemml by apache.
the class Dag method generateControlProgramJobs.
/**
* Method to generate instructions that are executed in Control Program. At
* this point, this DAG has no dependencies on the MR dag. ie. none of the
* inputs are outputs of MR jobs
*
* @param execNodes list of low-level operators
* @param inst list of instructions
* @param writeInst list of write instructions
* @param deleteInst list of delete instructions
* @throws LopsException if LopsException occurs
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private void generateControlProgramJobs(ArrayList<Lop> execNodes, ArrayList<Instruction> inst, ArrayList<Instruction> writeInst, ArrayList<Instruction> deleteInst) throws LopsException, DMLRuntimeException {
// nodes to be deleted from execnodes
ArrayList<Lop> markedNodes = new ArrayList<Lop>();
// variable names to be deleted
ArrayList<String> var_deletions = new ArrayList<String>();
HashMap<String, Lop> var_deletionsLineNum = new HashMap<String, Lop>();
boolean doRmVar = false;
for (int i = 0; i < execNodes.size(); i++) {
Lop node = execNodes.get(i);
doRmVar = false;
// TODO: statiko -- check if this condition ever evaluated to TRUE
if (node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() == DataType.SCALAR && node.getOutputParameters().getFile_name() == null) {
markedNodes.add(node);
continue;
}
// output scalar instructions and mark nodes for deletion
if (node.getExecLocation() == ExecLocation.ControlProgram) {
if (node.getDataType() == DataType.SCALAR) {
// Output from lops with SCALAR data type must
// go into Temporary Variables (Var0, Var1, etc.)
NodeOutput out = setupNodeOutputs(node, ExecType.CP, false, false);
// dummy
inst.addAll(out.getPreInstructions());
deleteInst.addAll(out.getLastInstructions());
} else {
// Output from lops with non-SCALAR data type must
// go into Temporary Files (temp0, temp1, etc.)
NodeOutput out = setupNodeOutputs(node, ExecType.CP, false, false);
inst.addAll(out.getPreInstructions());
boolean hasTransientWriteParent = false;
for (Lop parent : node.getOutputs()) {
if (parent.getExecLocation() == ExecLocation.Data && ((Data) parent).getOperationType() == Data.OperationTypes.WRITE && ((Data) parent).isTransient()) {
hasTransientWriteParent = true;
break;
}
}
if (!hasTransientWriteParent) {
deleteInst.addAll(out.getLastInstructions());
} else {
var_deletions.add(node.getOutputParameters().getLabel());
var_deletionsLineNum.put(node.getOutputParameters().getLabel(), node);
}
}
String inst_string = "";
// are handled separately, by simply passing ONLY the output variable to getInstructions()
if (node.getType() == Lop.Type.ParameterizedBuiltin || node.getType() == Lop.Type.GroupedAgg || node.getType() == Lop.Type.DataGen) {
inst_string = node.getInstructions(node.getOutputParameters().getLabel());
} else // separately as well by passing arrays of inputs and outputs
if (node.getType() == Lop.Type.FunctionCallCP) {
String[] inputs = new String[node.getInputs().size()];
String[] outputs = new String[node.getOutputs().size()];
int count = 0;
for (Lop in : node.getInputs()) inputs[count++] = in.getOutputParameters().getLabel();
count = 0;
for (Lop out : node.getOutputs()) {
outputs[count++] = out.getOutputParameters().getLabel();
}
inst_string = node.getInstructions(inputs, outputs);
} else if (node.getType() == Lop.Type.MULTIPLE_CP) {
// ie, MultipleCP class
inst_string = node.getInstructions(node.getOutputParameters().getLabel());
} else {
if (node.getInputs().isEmpty()) {
// currently, such a case exists only for Rand lop
inst_string = node.getInstructions(node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 1) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 2) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 3 || node.getType() == Type.Ternary) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 4) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 5) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 6) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getInputs().get(5).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else if (node.getInputs().size() == 7) {
inst_string = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), node.getInputs().get(1).getOutputParameters().getLabel(), node.getInputs().get(2).getOutputParameters().getLabel(), node.getInputs().get(3).getOutputParameters().getLabel(), node.getInputs().get(4).getOutputParameters().getLabel(), node.getInputs().get(5).getOutputParameters().getLabel(), node.getInputs().get(6).getOutputParameters().getLabel(), node.getOutputParameters().getLabel());
} else {
String[] inputs = new String[node.getInputs().size()];
for (int j = 0; j < node.getInputs().size(); j++) inputs[j] = node.getInputs().get(j).getOutputParameters().getLabel();
inst_string = node.getInstructions(inputs, node.getOutputParameters().getLabel());
}
}
try {
if (LOG.isTraceEnabled())
LOG.trace("Generating instruction - " + inst_string);
Instruction currInstr = InstructionParser.parseSingleInstruction(inst_string);
if (currInstr == null) {
throw new LopsException("Error parsing the instruction:" + inst_string);
}
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getOutputs().isEmpty())
currInstr.setLocation(node.getOutputs().get(0));
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
inst.add(currInstr);
} catch (Exception e) {
throw new LopsException(node.printErrorLocation() + "Problem generating simple inst - " + inst_string, e);
}
markedNodes.add(node);
doRmVar = true;
//continue;
} else if (node.getExecLocation() == ExecLocation.Data) {
Data dnode = (Data) node;
Data.OperationTypes op = dnode.getOperationType();
if (op == Data.OperationTypes.WRITE) {
NodeOutput out = null;
if (sendWriteLopToMR(node)) {
// In this case, Data WRITE lop goes into MR, and
// we don't have to do anything here
doRmVar = false;
} else {
out = setupNodeOutputs(node, ExecType.CP, false, false);
if (dnode.getDataType() == DataType.SCALAR) {
// processing is same for both transient and persistent scalar writes
writeInst.addAll(out.getLastInstructions());
//inst.addAll(out.getLastInstructions());
doRmVar = false;
} else {
// setupNodeOutputs() handles both transient and persistent matrix writes
if (dnode.isTransient()) {
//inst.addAll(out.getPreInstructions()); // dummy ?
deleteInst.addAll(out.getLastInstructions());
doRmVar = false;
} else {
// In case of persistent write lop, write instruction will be generated
// and that instruction must be added to <code>inst</code> so that it gets
// executed immediately. If it is added to <code>deleteInst</code> then it
// gets executed at the end of program block's execution
inst.addAll(out.getLastInstructions());
doRmVar = true;
}
}
markedNodes.add(node);
//continue;
}
} else {
// generate a temp label to hold the value that is read from HDFS
if (node.getDataType() == DataType.SCALAR) {
node.getOutputParameters().setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
String io_inst = node.getInstructions(node.getOutputParameters().getLabel(), node.getOutputParameters().getFile_name());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
currInstr.setLocation(node);
inst.add(currInstr);
Instruction tempInstr = VariableCPInstruction.prepareRemoveInstruction(node.getOutputParameters().getLabel());
tempInstr.setLocation(node);
deleteInst.add(tempInstr);
} else {
throw new LopsException("Matrix READs are not handled in CP yet!");
}
markedNodes.add(node);
doRmVar = true;
//continue;
}
}
// see if rmvar instructions can be generated for node's inputs
if (doRmVar)
processConsumersForInputs(node, inst, deleteInst);
doRmVar = false;
}
for (String var : var_deletions) {
Instruction rmInst = VariableCPInstruction.prepareRemoveInstruction(var);
if (LOG.isTraceEnabled())
LOG.trace(" Adding var_deletions: " + rmInst.toString());
rmInst.setLocation(var_deletionsLineNum.get(var));
deleteInst.add(rmInst);
}
// delete all marked nodes
for (Lop node : markedNodes) {
execNodes.remove(node);
}
}
Aggregations