use of org.apache.sysml.lops.ParameterizedBuiltin in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLops.
@Override
public Lop constructLops() throws HopsException, LopsException {
//return already created lops
if (getLops() != null)
return getLops();
// construct lops for all input parameters
HashMap<String, Lop> inputlops = new HashMap<String, Lop>();
for (Entry<String, Integer> cur : _paramIndexMap.entrySet()) {
inputlops.put(cur.getKey(), getInput().get(cur.getValue()).constructLops());
}
switch(_op) {
case GROUPEDAGG:
{
ExecType et = optFindExecType();
constructLopsGroupedAggregate(inputlops, et);
break;
}
case RMEMPTY:
{
ExecType et = optFindExecType();
et = (et == ExecType.MR && !COMPILE_PARALLEL_REMOVEEMPTY) ? ExecType.CP_FILE : et;
constructLopsRemoveEmpty(inputlops, et);
break;
}
case REXPAND:
{
ExecType et = optFindExecType();
constructLopsRExpand(inputlops, et);
break;
}
case TRANSFORM:
{
ExecType et = optFindExecType();
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
// output of transform is always in CSV format
// to produce a blocked output, this lop must be
// fed into CSV Reblock lop.
pbilop.getOutputParameters().setFormat(Format.CSV);
setLops(pbilop);
break;
}
case CDF:
case INVCDF:
case REPLACE:
case TRANSFORMAPPLY:
case TRANSFORMDECODE:
case TRANSFORMMETA:
case TOSTRING:
{
ExecType et = optFindExecType();
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
setLops(pbilop);
break;
}
default:
throw new HopsException("Unknown ParamBuiltinOp: " + _op);
}
//add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
use of org.apache.sysml.lops.ParameterizedBuiltin in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLopsRemoveEmpty.
private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
Hop targetHop = getInput().get(_paramIndexMap.get("target"));
Hop marginHop = getInput().get(_paramIndexMap.get("margin"));
Hop selectHop = (_paramIndexMap.get("select") != null) ? getInput().get(_paramIndexMap.get("select")) : null;
if (et == ExecType.CP || et == ExecType.CP_FILE) {
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
setLops(pbilop);
/*DISABLED CP PMM (see for example, MDA Bivar test, requires size propagation on recompile)
if( et == ExecType.CP && isTargetDiagInput() && marginHop instanceof LiteralOp
&& ((LiteralOp)marginHop).getStringValue().equals("rows")
&& _outputPermutationMatrix ) //SPECIAL CASE SELECTION VECTOR
{
//TODO this special case could be taken into account for memory estimates in order
// to reduce the estimates for the input diag and subsequent matrix multiply
//get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp)input).isPPredOperation());
//step1: compute index vectors
Hop ppred0 = input;
if( !isPPredInput ) { //ppred only if required
ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp("0",0));
HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
ppred0.refreshSizeInformation();
ppred0.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, ppred0);
}
UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0);
HopRewriteUtils.setOutputBlocksizes(cumsum, brlen, bclen);
cumsum.refreshSizeInformation();
cumsum.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, cumsum);
BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
HopRewriteUtils.setOutputBlocksizes(sel, brlen, bclen);
sel.refreshSizeInformation();
sel.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, sel);
Lop loutput = sel.constructLops();
//Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops( loutput );
}
else //GENERAL CASE
{
ParameterizedBuiltin pbilop = new ParameterizedBuiltin( et, inputlops,
HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType());
pbilop.getOutputParameters().setDimensions(getDim1(),getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(pbilop);
setLops(pbilop);
}
*/
} else if (et == ExecType.MR) {
//special compile for mr removeEmpty-diag
if (isTargetDiagInput() && marginHop instanceof LiteralOp && ((LiteralOp) marginHop).getStringValue().equals("rows")) {
//get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp) input).isPPredOperation());
//step1: compute index vectors
Hop ppred0 = input;
if (!isPPredInput) {
//ppred only if required
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
Lop loutput = null;
double mest = AggBinaryOp.getMapmmMemEstimate(input.getDim1(), 1, brlen, bclen, -1, brlen, bclen, brlen, bclen, -1, 1, true);
double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
if (//SPECIAL CASE: SELECTION VECTOR
_outputPermutationMatrix && mest < mbudget) {
BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
loutput = sel.constructLops();
} else //GENERAL CASE: GENERAL PERMUTATION MATRIX
{
//max ensures non-zero entries and at least one output row
BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
seq.setName("tmp4");
HopRewriteUtils.updateHopCharacteristics(seq, brlen, bclen, memo, this);
//step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
//note: weights always the input (even if isPPredInput) because input also includes 0s
TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
table.setOutputBlocksizes(brlen, bclen);
table.refreshSizeInformation();
//force MR
table.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, table);
table.setDisjointInputs(true);
table.setOutputEmptyBlocks(_outputEmptyBlocks);
loutput = table.constructLops();
HopRewriteUtils.removeChildReference(table, input);
}
//Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(loutput);
} else //default mr remove empty
if (et == ExecType.MR) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
long nnz = input.getNnz();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
//construct lops via new partial hop dag and subsequent lops construction
//in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
//Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
//always MR
ppred0.setForcedExecType(ExecType.MR);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
//always MR
emptyInd.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
} else {
emptyInd = selectHop;
emptyInd.setOutputBlocksizes(brlen, bclen);
emptyInd.refreshSizeInformation();
//always MR
emptyInd.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
//Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
//alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
//Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
double mestPM = OptimizerUtils.estimatePartitionedSizeExactSparsity(rlen, 1, brlen, bclen, 1.0);
Lop rmEmpty = null;
//a) broadcast-based PMM (permutation matrix mult)
if (rmRows && rlen > 0 && mestPM < OptimizerUtils.getRemoteMemBudgetMap()) {
boolean needPart = !offsets.dimsKnown() || offsets.getDim1() > DistributedCacheInput.PARTITION_SIZE;
if (needPart) {
//requires partitioning
loffset = new DataPartition(loffset, DataType.MATRIX, ValueType.DOUBLE, (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
loffset.getOutputParameters().setDimensions(rlen, 1, brlen, bclen, rlen);
setLineNumbers(loffset);
}
rmEmpty = new PMMJ(loffset, linput, lmaxdim, getDataType(), getValueType(), needPart, true, ExecType.MR);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
} else //b) general case: repartition-based rmempty
{
boolean requiresRep = ((clen > bclen || clen <= 0) && rmRows) || ((rlen > brlen || rlen <= 0) && !rmRows);
if (requiresRep) {
//ncol of left input (determines num replicates)
Lop pos = createOffsetLop(input, rmRows);
loffset = new RepMat(loffset, pos, rmRows, DataType.MATRIX, ValueType.DOUBLE);
loffset.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
setLineNumbers(loffset);
}
Group group1 = new Group(linput, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group1);
group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
Group group2 = new Group(loffset, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group2);
group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
HashMap<String, Lop> inMap = new HashMap<String, Lop>();
inMap.put("target", group1);
inMap.put("offset", group2);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
rmEmpty = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
}
Group group3 = new Group(rmEmpty, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group3);
group3.getOutputParameters().setDimensions(-1, -1, brlen, bclen, -1);
Aggregate finalagg = new Aggregate(group3, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(finalagg);
setLineNumbers(finalagg);
//Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(finalagg);
}
} else if (et == ExecType.SPARK) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
//construct lops via new partial hop dag and subsequent lops construction
//in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
//Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
//always Spark
ppred0.setForcedExecType(ExecType.SPARK);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
//always Spark
emptyInd.setForcedExecType(ExecType.SPARK);
}
} else {
emptyInd = selectHop;
emptyInd.setOutputBlocksizes(brlen, bclen);
emptyInd.refreshSizeInformation();
//always Spark
emptyInd.setForcedExecType(ExecType.SPARK);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
//Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
//alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
//Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
HashMap<String, Lop> inMap = new HashMap<String, Lop>();
inMap.put("target", linput);
inMap.put("offset", loffset);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
if (!FORCE_DIST_RM_EMPTY && isRemoveEmptyBcSP())
_bRmEmptyBC = true;
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
//Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(pbilop);
//NOTE: in contrast to mr, replication and aggregation handled instruction-local
}
}
use of org.apache.sysml.lops.ParameterizedBuiltin in project incubator-systemml by apache.
the class Dag method getAggAndOtherInstructions.
/**
* Method to populate aggregate and other instructions in reducer.
*
* @param node low-level operator
* @param execNodes list of exec nodes
* @param shuffleInstructions list of shuffle instructions
* @param aggInstructionsReducer ?
* @param otherInstructionsReducer ?
* @param nodeIndexMapping node index mapping
* @param start_index start index
* @param inputLabels list of input labels
* @param inputLops list of input lops
* @param MRJobLineNumbers MR job line numbers
* @return -1 if problem
* @throws LopsException if LopsException occurs
*/
private int getAggAndOtherInstructions(Lop node, ArrayList<Lop> execNodes, ArrayList<String> shuffleInstructions, ArrayList<String> aggInstructionsReducer, ArrayList<String> otherInstructionsReducer, HashMap<Lop, Integer> nodeIndexMapping, int[] start_index, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) throws LopsException {
int ret_val = -1;
if (nodeIndexMapping.containsKey(node))
return nodeIndexMapping.get(node);
if (!execNodes.contains(node))
return ret_val;
ArrayList<Integer> inputIndices = new ArrayList<Integer>();
// first element.
if (node.getType() == Lop.Type.Data && ((Data) node).getOperationType() == Data.OperationTypes.WRITE) {
ret_val = getAggAndOtherInstructions(node.getInputs().get(0), execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
} else {
for (Lop cnode : node.getInputs()) {
ret_val = getAggAndOtherInstructions(cnode, execNodes, shuffleInstructions, aggInstructionsReducer, otherInstructionsReducer, nodeIndexMapping, start_index, inputLabels, inputLops, MRJobLineNumbers);
inputIndices.add(ret_val);
}
}
if (node.getExecLocation() == ExecLocation.Data) {
if (((Data) node).getFileFormatType() == FileFormatTypes.CSV && !(node.getInputs().get(0) instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) node.getInputs().get(0)).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM)) {
// Generate write instruction, which goes into CSV_WRITE Job
int output_index = start_index[0];
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return ret_val;
}
}
if (node.getExecLocation() == ExecLocation.MapAndReduce) {
/* Generate Shuffle Instruction for "node", and return the index associated with produced output */
boolean instGenerated = true;
int output_index = start_index[0];
switch(node.getType()) {
/* Lop types that take a single input */
case ReBlock:
case CSVReBlock:
case SortKeys:
case CentralMoment:
case CoVariance:
case GroupedAgg:
case DataPartition:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
case ParameterizedBuiltin:
if (((ParameterizedBuiltin) node).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM) {
shuffleInstructions.add(node.getInstructions(output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
}
break;
/* Lop types that take two inputs */
case MMCJ:
case MMRJ:
case CombineBinary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
/* Lop types that take three inputs */
case CombineTernary:
shuffleInstructions.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
break;
default:
instGenerated = false;
break;
}
if (instGenerated) {
nodeIndexMapping.put(node, output_index);
start_index[0]++;
return output_index;
} else {
return inputIndices.get(0);
}
}
/* Get instructions for aligned reduce and other lops below the reduce. */
if (node.getExecLocation() == ExecLocation.Reduce || node.getExecLocation() == ExecLocation.MapOrReduce || hasChildNode(node, execNodes, ExecLocation.MapAndReduce)) {
if (inputIndices.size() == 1) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Aggregate) {
aggInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), output_index));
}
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else if (inputIndices.size() == 2) {
int output_index = start_index[0];
start_index[0]++;
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
if (node instanceof Unary && node.getInputs().size() > 1) {
int index = 0;
for (int i = 0; i < node.getInputs().size(); i++) {
if (node.getInputs().get(i).getDataType() == DataType.SCALAR) {
index = i;
break;
}
}
if (node.getInputs().get(index).getExecLocation() == ExecLocation.Data && !((Data) (node.getInputs().get(index))).isLiteral()) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
if (node.getInputs().get(index).getExecLocation() != ExecLocation.Data) {
inputLabels.add(node.getInputs().get(index).getOutputParameters().getLabel());
inputLops.add(node.getInputs().get(index));
}
}
return output_index;
} else if (inputIndices.size() == 3 || node.getType() == Type.Ternary) {
int output_index = start_index[0];
start_index[0]++;
if (node.getType() == Type.Ternary) {
// in case of CTABLE_TRANSFORM_SCALAR_WEIGHT: inputIndices.get(2) would be -1
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else if (node.getType() == Type.ParameterizedBuiltin) {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
} else {
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
}
return output_index;
} else if (inputIndices.size() == 4) {
int output_index = start_index[0];
start_index[0]++;
otherInstructionsReducer.add(node.getInstructions(inputIndices.get(0), inputIndices.get(1), inputIndices.get(2), inputIndices.get(3), output_index));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
nodeIndexMapping.put(node, output_index);
return output_index;
} else
throw new LopsException("Invalid number of inputs to a lop: " + inputIndices.size());
}
return -1;
}
use of org.apache.sysml.lops.ParameterizedBuiltin in project incubator-systemml by apache.
the class ParameterizedBuiltinOp method constructLopsRExpand.
private void constructLopsRExpand(HashMap<String, Lop> inputlops, ExecType et) throws HopsException, LopsException {
if (et == ExecType.CP || et == ExecType.SPARK) {
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, k);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
setLops(pbilop);
} else if (et == ExecType.MR) {
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
Group group1 = new Group(pbilop, Group.OperationTypes.Sort, getDataType(), getValueType());
setOutputDimensions(group1);
setLineNumbers(group1);
Aggregate finalagg = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(finalagg);
setLineNumbers(finalagg);
setLops(finalagg);
}
}
use of org.apache.sysml.lops.ParameterizedBuiltin in project incubator-systemml by apache.
the class Dag method setupNodeOutputs.
/**
* Method to setup output filenames and outputInfos, and to generate related instructions
*
* @param node low-level operator
* @param et exec type
* @param cellModeOverride override mode
* @param copyTWrite ?
* @return node output
* @throws DMLRuntimeException if DMLRuntimeException occurs
* @throws LopsException if LopsException occurs
*/
private NodeOutput setupNodeOutputs(Lop node, ExecType et, boolean cellModeOverride, boolean copyTWrite) throws DMLRuntimeException, LopsException {
OutputParameters oparams = node.getOutputParameters();
NodeOutput out = new NodeOutput();
node.setConsumerCount(node.getOutputs().size());
// Compute the output format for this node
out.setOutInfo(getOutputInfo(node, cellModeOverride));
// since outputs are explicitly specified
if (node.getExecLocation() != ExecLocation.Data) {
if (node.getDataType() == DataType.SCALAR) {
oparams.setLabel(Lop.SCALAR_VAR_NAME_PREFIX + var_index.getNextID());
out.setVarName(oparams.getLabel());
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
} else if (node instanceof ParameterizedBuiltin && ((ParameterizedBuiltin) node).getOp() == org.apache.sysml.lops.ParameterizedBuiltin.OperationTypes.TRANSFORM) {
ParameterizedBuiltin pbi = (ParameterizedBuiltin) node;
Lop input = pbi.getNamedInput(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_DATA);
if (input.getDataType() == DataType.FRAME) {
// Output of transform is in CSV format, which gets subsequently reblocked
// TODO: change it to output binaryblock
Data dataInput = (Data) input;
oparams.setFile_name(getNextUniqueFilename());
oparams.setLabel(getNextUniqueVarname(DataType.MATRIX));
// generate an instruction that creates a symbol table entry for the new variable in CSV format
Data delimLop = (Data) dataInput.getNamedInputLop(DataExpression.DELIM_DELIMITER, DataExpression.DEFAULT_DELIM_DELIMITER);
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, DataType.MATRIX, OutputInfo.outputInfoToString(OutputInfo.CSVOutputInfo), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), -1, -1, oparams.getNnz()), oparams.getUpdateType(), false, delimLop.getStringValue(), true);
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// temp file as well as the variable has to be deleted at the end
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the generated filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
throw new LopsException("Input to transform() has an invalid type: " + input.getDataType() + ", it must be FRAME.");
}
} else if (//general case
!(node instanceof FunctionCallCP)) {
// generate temporary filename and a variable name to hold the
// output produced by "rootNode"
oparams.setFile_name(getNextUniqueFilename());
oparams.setLabel(getNextUniqueVarname(node.getDataType()));
// generate an instruction that creates a symbol table entry for the new variable
//String createInst = prepareVariableInstruction("createvar", node);
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), oparams.getFile_name(), true, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// temp file as well as the variable has to be deleted at the end
Instruction currInstr = VariableCPInstruction.prepareRemoveInstruction(oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the generated filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
// If the function call is set with output lops (e.g., multi return builtin),
// generate a createvar instruction for each function output
FunctionCallCP fcall = (FunctionCallCP) node;
if (fcall.getFunctionOutputs() != null) {
for (Lop fnOut : fcall.getFunctionOutputs()) {
OutputParameters fnOutParams = fnOut.getOutputParameters();
//OutputInfo oinfo = getOutputInfo((N)fnOut, false);
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(fnOutParams.getLabel(), getFilePath() + fnOutParams.getLabel(), true, fnOut.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(fnOut, false)), new MatrixCharacteristics(fnOutParams.getNumRows(), fnOutParams.getNumCols(), (int) fnOutParams.getRowsInBlock(), (int) fnOutParams.getColsInBlock(), fnOutParams.getNnz()), oparams.getUpdateType());
if (node._beginLine != 0)
createvarInst.setLocation(node);
else
createvarInst.setLocation(fnOut);
out.addPreInstruction(createvarInst);
}
}
}
} else // rootNode is of type Data
{
if (node.getDataType() == DataType.SCALAR) {
// generate assignment operations for final and transient writes
if (oparams.getFile_name() == null && !(node instanceof Data && ((Data) node).isPersistentWrite())) {
String io_inst = prepareAssignVarInstruction(node.getInputs().get(0), node);
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
} else {
//CP PERSISTENT WRITE SCALARS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
if (node._beginLine != 0)
currInstr.setLocation(node);
else if (!node.getInputs().isEmpty())
currInstr.setLocation(node.getInputs().get(0));
out.addLastInstruction(currInstr);
}
} else {
if (((Data) node).isTransient()) {
if (et == ExecType.CP) {
// If transient matrix write is in CP then its input MUST be executed in CP as well.
// get variable and filename associated with the input
String inputFileName = node.getInputs().get(0).getOutputParameters().getFile_name();
String inputVarName = node.getInputs().get(0).getOutputParameters().getLabel();
String constVarName = oparams.getLabel();
String constFileName = inputFileName + constVarName;
/*
* Symbol Table state must change as follows:
*
* FROM:
* mvar1 -> temp21
*
* TO:
* mVar1 -> temp21
* tVarH -> temp21
*/
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(inputVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
out.setFileName(constFileName);
} else {
if (copyTWrite) {
Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(node.getInputs().get(0).getOutputParameters().getLabel(), oparams.getLabel());
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
return out;
}
/*
* Since the "rootNode" is a transient data node, we first need to generate a
* temporary filename as well as a variable name to hold the <i>immediate</i>
* output produced by "rootNode". These generated HDFS filename and the
* variable name must be changed at the end of an iteration/program block
* so that the subsequent iteration/program block can correctly access the
* generated data. Therefore, we need to distinguish between the following:
*
* 1) Temporary file name & variable name: They hold the immediate output
* produced by "rootNode". Both names are generated below.
*
* 2) Constant file name & variable name: They are constant across iterations.
* Variable name is given by rootNode's label that is created in the upper layers.
* File name is generated by concatenating "temporary file name" and "constant variable name".
*
* Temporary files must be moved to constant files at the end of the iteration/program block.
*/
// generate temporary filename & var name
String tempVarName = oparams.getLabel() + "temp";
String tempFileName = getNextUniqueFilename();
//String createInst = prepareVariableInstruction("createvar", tempVarName, node.getDataType(), node.getValueType(), tempFileName, oparams, out.getOutInfo());
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Instruction createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(tempVarName, tempFileName, true, node.getDataType(), OutputInfo.outputInfoToString(out.getOutInfo()), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
String constVarName = oparams.getLabel();
String constFileName = tempFileName + constVarName;
oparams.setFile_name(getFilePath() + constFileName);
/*
* Since this is a node that denotes a transient read/write, we need to make sure
* that the data computed for a given variable in a given iteration is passed on
* to the next iteration. This is done by generating miscellaneous instructions
* that gets executed at the end of the program block.
*
* The state of the symbol table must change
*
* FROM:
* tVarA -> temp21tVarA (old copy of temp21)
* tVarAtemp -> temp21 (new copy that should override the old copy)
*
* TO:
* tVarA -> temp21tVarA
*/
// rename the temp variable to constant variable (e.g., cpvar tVarAtemp tVarA)
/*Instruction currInstr = VariableCPInstruction.prepareCopyInstruction(tempVarName, constVarName);
if(DMLScript.ENABLE_DEBUG_MODE) {
currInstr.setLineNum(node._beginLine);
}
out.addLastInstruction(currInstr);
Instruction tempInstr = VariableCPInstruction.prepareRemoveInstruction(tempVarName);
if(DMLScript.ENABLE_DEBUG_MODE) {
tempInstr.setLineNum(node._beginLine);
}
out.addLastInstruction(tempInstr);*/
// Generate a single mvvar instruction (e.g., mvvar tempA A)
// instead of two instructions "cpvar tempA A" and "rmvar tempA"
Instruction currInstr = VariableCPInstruction.prepareMoveInstruction(tempVarName, constVarName);
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
// finally, add the temporary filename and variable name to the list of outputs
out.setFileName(tempFileName);
out.setVarName(tempVarName);
}
} else // rootNode is not a transient write. It is a persistent write.
{
if (et == ExecType.MR) {
//MR PERSISTENT WRITE
// create a variable to hold the result produced by this "rootNode"
oparams.setLabel("pVar" + var_index.getNextID());
//String createInst = prepareVariableInstruction("createvar", node);
//out.addPreInstruction(CPInstructionParser.parseSingleInstruction(createInst));
int rpb = (int) oparams.getRowsInBlock();
int cpb = (int) oparams.getColsInBlock();
Lop fnameLop = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
String fnameStr = (fnameLop instanceof Data && ((Data) fnameLop).isLiteral()) ? fnameLop.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fnameLop.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
Instruction createvarInst;
// part MM format file on hdfs.
if (oparams.getFormat() == Format.CSV) {
String tempFileName = getNextUniqueFilename();
String createInst = node.getInstructions(tempFileName);
createvarInst = CPInstructionParser.parseSingleInstruction(createInst);
//NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else if (oparams.getFormat() == Format.MM) {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), getNextUniqueFilename(), false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
//NOTE: no instruction patching because final write from cp instruction
String writeInst = node.getInstructions(oparams.getLabel(), fnameLop.getOutputParameters().getLabel());
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction(writeInst);
currInstr.setLocation(node);
out.addPostInstruction(currInstr);
// remove the variable
CPInstruction tempInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "true" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
tempInstr.setLocation(node);
out.addLastInstruction(tempInstr);
} else {
createvarInst = VariableCPInstruction.prepareCreateVariableInstruction(oparams.getLabel(), fnameStr, false, node.getDataType(), OutputInfo.outputInfoToString(getOutputInfo(node, false)), new MatrixCharacteristics(oparams.getNumRows(), oparams.getNumCols(), rpb, cpb, oparams.getNnz()), oparams.getUpdateType());
// remove the variable
CPInstruction currInstr = CPInstructionParser.parseSingleInstruction("CP" + Lop.OPERAND_DELIMITOR + "rmfilevar" + Lop.OPERAND_DELIMITOR + oparams.getLabel() + Lop.VALUETYPE_PREFIX + Expression.ValueType.UNKNOWN + Lop.OPERAND_DELIMITOR + "false" + Lop.VALUETYPE_PREFIX + "BOOLEAN");
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
}
createvarInst.setLocation(node);
out.addPreInstruction(createvarInst);
// finally, add the filename and variable name to the list of outputs
out.setFileName(oparams.getFile_name());
out.setVarName(oparams.getLabel());
} else {
//CP PERSISTENT WRITE
// generate a write instruction that writes matrix to HDFS
Lop fname = ((Data) node).getNamedInputLop(DataExpression.IO_FILENAME);
Instruction currInstr = null;
Lop inputLop = node.getInputs().get(0);
// Move the temporary file on HDFS to required persistent location, insteadof copying.
if (inputLop.getExecLocation() == ExecLocation.Data && inputLop.getOutputs().size() == 1 && ((Data) inputLop).isTransient() && ((Data) inputLop).getOutputParameters().isBlocked() && node.getOutputParameters().isBlocked()) {
// transient read feeding into persistent write in blocked representation
// simply, move the file
//prepare filename (literal or variable in order to support dynamic write)
String fnameStr = (fname instanceof Data && ((Data) fname).isLiteral()) ? fname.getOutputParameters().getLabel() : Lop.VARIABLE_NAME_PLACEHOLDER + fname.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER;
currInstr = (CPInstruction) VariableCPInstruction.prepareMoveInstruction(inputLop.getOutputParameters().getLabel(), fnameStr, "binaryblock");
} else {
String io_inst = node.getInstructions(node.getInputs().get(0).getOutputParameters().getLabel(), fname.getOutputParameters().getLabel());
if (node.getExecType() == ExecType.SPARK)
// This will throw an exception if the exectype of hop is set incorrectly
// Note: the exec type and exec location of lops needs to be set to SPARK and ControlProgram respectively
currInstr = SPInstructionParser.parseSingleInstruction(io_inst);
else
currInstr = CPInstructionParser.parseSingleInstruction(io_inst);
}
if (!node.getInputs().isEmpty() && node.getInputs().get(0)._beginLine != 0)
currInstr.setLocation(node.getInputs().get(0));
else
currInstr.setLocation(node);
out.addLastInstruction(currInstr);
}
}
}
}
return out;
}
Aggregations