use of org.apache.sysml.lops.Aggregate in project systemml by apache.
the class ParameterizedBuiltinOp method constructLopsRemoveEmpty.
private void constructLopsRemoveEmpty(HashMap<String, Lop> inputlops, ExecType et) {
Hop targetHop = getTargetHop();
Hop marginHop = getParameterHop("margin");
Hop selectHop = getParameterHop("select");
Hop emptyRet = getParameterHop("empty.return");
if (et == ExecType.CP) {
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inputlops, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
setLops(pbilop);
/*DISABLED CP PMM (see for example, MDA Bivar test, requires size propagation on recompile)
if( et == ExecType.CP && isTargetDiagInput() && marginHop instanceof LiteralOp
&& ((LiteralOp)marginHop).getStringValue().equals("rows")
&& _outputPermutationMatrix ) //SPECIAL CASE SELECTION VECTOR
{
//TODO this special case could be taken into account for memory estimates in order
// to reduce the estimates for the input diag and subsequent matrix multiply
//get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
long brlen = input.getRowsInBlock();
long bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp)input).isPPredOperation());
//step1: compute index vectors
Hop ppred0 = input;
if( !isPPredInput ) { //ppred only if required
ppred0 = new BinaryOp("tmp1", DataType.MATRIX, ValueType.DOUBLE, OpOp2.NOTEQUAL, input, new LiteralOp("0",0));
HopRewriteUtils.setOutputBlocksizes(ppred0, brlen, bclen);
ppred0.refreshSizeInformation();
ppred0.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, ppred0);
}
UnaryOp cumsum = new UnaryOp("tmp2", DataType.MATRIX, ValueType.DOUBLE, OpOp1.CUMSUM, ppred0);
HopRewriteUtils.setOutputBlocksizes(cumsum, brlen, bclen);
cumsum.refreshSizeInformation();
cumsum.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, cumsum);
BinaryOp sel = new BinaryOp("tmp3", DataType.MATRIX, ValueType.DOUBLE, OpOp2.MULT, ppred0, cumsum);
HopRewriteUtils.setOutputBlocksizes(sel, brlen, bclen);
sel.refreshSizeInformation();
sel.computeMemEstimate(memo); //select exec type
HopRewriteUtils.copyLineNumbers(this, sel);
Lop loutput = sel.constructLops();
//Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops( loutput );
}
else //GENERAL CASE
{
ParameterizedBuiltin pbilop = new ParameterizedBuiltin( et, inputlops,
HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType());
pbilop.getOutputParameters().setDimensions(getDim1(),getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(pbilop);
setLops(pbilop);
}
*/
} else if (et == ExecType.MR) {
// special compile for mr removeEmpty-diag
if (isTargetDiagInput() && HopRewriteUtils.isLiteralOfValue(marginHop, "rows")) {
// get input vector (without materializing diag())
Hop input = targetHop.getInput().get(0);
int brlen = input.getRowsInBlock();
int bclen = input.getColsInBlock();
MemoTable memo = new MemoTable();
boolean isPPredInput = (input instanceof BinaryOp && ((BinaryOp) input).isPPredOperation());
// step1: compute index vectors
Hop ppred0 = input;
if (!isPPredInput) {
// ppred only if required
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
HopRewriteUtils.updateHopCharacteristics(ppred0, brlen, bclen, memo, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(ppred0, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, memo, this);
Lop loutput = null;
double mest = AggBinaryOp.getMapmmMemEstimate(input.getDim1(), 1, brlen, bclen, -1, brlen, bclen, brlen, bclen, -1, 1, true);
double mbudget = OptimizerUtils.getRemoteMemBudgetMap(true);
if (// SPECIAL CASE: SELECTION VECTOR
_outputPermutationMatrix && mest < mbudget) {
BinaryOp sel = HopRewriteUtils.createBinary(ppred0, cumsum, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(sel, brlen, bclen, memo, this);
loutput = sel.constructLops();
} else // GENERAL CASE: GENERAL PERMUTATION MATRIX
{
// max ensures non-zero entries and at least one output row
BinaryOp max = HopRewriteUtils.createBinary(cumsum, new LiteralOp(1), OpOp2.MAX);
HopRewriteUtils.updateHopCharacteristics(max, brlen, bclen, memo, this);
DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(input);
seq.setName("tmp4");
HopRewriteUtils.updateHopCharacteristics(seq, brlen, bclen, memo, this);
// step 2: compute removeEmpty(rows) output via table, seq guarantees right column dimension
// note: weights always the input (even if isPPredInput) because input also includes 0s
TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, max, seq, input);
table.setOutputBlocksizes(brlen, bclen);
table.refreshSizeInformation();
// force MR
table.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, table);
table.setDisjointInputs(true);
table.setOutputEmptyBlocks(_outputEmptyBlocks);
loutput = table.constructLops();
HopRewriteUtils.removeChildReference(table, input);
}
// Step 4: cleanup hops (allow for garbage collection)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(loutput);
} else // default mr remove empty
if (et == ExecType.MR) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
int brlen = input.getRowsInBlock();
int bclen = input.getColsInBlock();
long nnz = input.getNnz();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
// construct lops via new partial hop dag and subsequent lops construction
// in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
// Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
// always MR
ppred0.setForcedExecType(ExecType.MR);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
// always MR
emptyInd.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, emptyInd);
}
} else {
emptyInd = selectHop;
}
// Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
// alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
// Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
double mestPM = OptimizerUtils.estimatePartitionedSizeExactSparsity(rlen, 1, brlen, bclen, 1.0);
Lop rmEmpty = null;
// a) broadcast-based PMM (permutation matrix mult)
if (rmRows && rlen >= 0 && mestPM < OptimizerUtils.getRemoteMemBudgetMap() && HopRewriteUtils.isLiteralOfValue(emptyRet, false)) {
boolean needPart = !offsets.dimsKnown() || offsets.getDim1() > DistributedCacheInput.PARTITION_SIZE;
if (needPart) {
// requires partitioning
loffset = new DataPartition(loffset, DataType.MATRIX, ValueType.DOUBLE, (mestPM > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
loffset.getOutputParameters().setDimensions(rlen, 1, brlen, bclen, rlen);
setLineNumbers(loffset);
}
rmEmpty = new PMMJ(loffset, linput, lmaxdim, getDataType(), getValueType(), needPart, true, ExecType.MR);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
} else // b) general case: repartition-based rmempty
{
boolean requiresRep = ((clen > bclen || clen <= 0) && rmRows) || ((rlen > brlen || rlen <= 0) && !rmRows);
if (requiresRep) {
// ncol of left input (determines num replicates)
Lop pos = createOffsetLop(input, rmRows);
loffset = new RepMat(loffset, pos, rmRows, DataType.MATRIX, ValueType.DOUBLE);
loffset.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
setLineNumbers(loffset);
}
Group group1 = new Group(linput, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group1);
group1.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
Group group2 = new Group(loffset, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group2);
group2.getOutputParameters().setDimensions(rlen, clen, brlen, bclen, nnz);
HashMap<String, Lop> inMap = new HashMap<>();
inMap.put("target", group1);
inMap.put("offset", group2);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
inMap.put("empty.return", inputlops.get("empty.return"));
rmEmpty = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et);
setOutputDimensions(rmEmpty);
setLineNumbers(rmEmpty);
}
Group group3 = new Group(rmEmpty, Group.OperationTypes.Sort, getDataType(), getValueType());
setLineNumbers(group3);
group3.getOutputParameters().setDimensions(-1, -1, brlen, bclen, -1);
Aggregate finalagg = new Aggregate(group3, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), ExecType.MR);
setOutputDimensions(finalagg);
setLineNumbers(finalagg);
// Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(finalagg);
}
} else if (et == ExecType.SPARK) {
if (!(marginHop instanceof LiteralOp))
throw new HopsException("Parameter 'margin' must be a literal argument.");
Hop input = targetHop;
long rlen = input.getDim1();
long clen = input.getDim2();
int brlen = input.getRowsInBlock();
int bclen = input.getColsInBlock();
boolean rmRows = ((LiteralOp) marginHop).getStringValue().equals("rows");
// construct lops via new partial hop dag and subsequent lops construction
// in order to reuse of operator selection decisions
BinaryOp ppred0 = null;
Hop emptyInd = null;
if (selectHop == null) {
// Step1: compute row/col non-empty indicators
ppred0 = HopRewriteUtils.createBinary(input, new LiteralOp(0), OpOp2.NOTEQUAL);
// always Spark
ppred0.setForcedExecType(ExecType.SPARK);
emptyInd = ppred0;
if (!((rmRows && clen == 1) || (!rmRows && rlen == 1))) {
emptyInd = HopRewriteUtils.createAggUnaryOp(ppred0, AggOp.MAX, rmRows ? Direction.Row : Direction.Col);
// always Spark
emptyInd.setForcedExecType(ExecType.SPARK);
}
} else {
emptyInd = selectHop;
}
// Step 2: compute row offsets for non-empty rows
Hop cumsumInput = emptyInd;
if (!rmRows) {
cumsumInput = HopRewriteUtils.createTranspose(emptyInd);
HopRewriteUtils.updateHopCharacteristics(cumsumInput, brlen, bclen, this);
}
UnaryOp cumsum = HopRewriteUtils.createUnary(cumsumInput, OpOp1.CUMSUM);
HopRewriteUtils.updateHopCharacteristics(cumsum, brlen, bclen, this);
Hop cumsumOutput = cumsum;
if (!rmRows) {
cumsumOutput = HopRewriteUtils.createTranspose(cumsum);
HopRewriteUtils.updateHopCharacteristics(cumsumOutput, brlen, bclen, this);
}
// alternative: right indexing
Hop maxDim = HopRewriteUtils.createAggUnaryOp(cumsumOutput, AggOp.MAX, Direction.RowCol);
HopRewriteUtils.updateHopCharacteristics(maxDim, brlen, bclen, this);
BinaryOp offsets = HopRewriteUtils.createBinary(cumsumOutput, emptyInd, OpOp2.MULT);
HopRewriteUtils.updateHopCharacteristics(offsets, brlen, bclen, this);
// Step 3: gather non-empty rows/cols into final results
Lop linput = input.constructLops();
Lop loffset = offsets.constructLops();
Lop lmaxdim = maxDim.constructLops();
HashMap<String, Lop> inMap = new HashMap<>();
inMap.put("target", linput);
inMap.put("offset", loffset);
inMap.put("maxdim", lmaxdim);
inMap.put("margin", inputlops.get("margin"));
inMap.put("empty.return", inputlops.get("empty.return"));
if (!FORCE_DIST_RM_EMPTY && isRemoveEmptyBcSP())
_bRmEmptyBC = true;
ParameterizedBuiltin pbilop = new ParameterizedBuiltin(inMap, HopsParameterizedBuiltinLops.get(_op), getDataType(), getValueType(), et, _bRmEmptyBC);
setOutputDimensions(pbilop);
setLineNumbers(pbilop);
// Step 4: cleanup hops (allow for garbage collection)
if (selectHop == null)
HopRewriteUtils.removeChildReference(ppred0, input);
setLops(pbilop);
// NOTE: in contrast to mr, replication and aggregation handled instruction-local
}
}
use of org.apache.sysml.lops.Aggregate in project systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.
private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) {
// NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop W = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
// requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
// partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
// requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
// map-side wsloss always with broadcast
Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop grpW = W.constructLops();
if (grpW.getDataType() == DataType.MATRIX) {
grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
setLineNumbers(grpW);
}
Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
// reduce-side wsloss w/ or without broadcast
Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.Aggregate in project systemml by apache.
the class ReorgOp method constructLops.
@Override
public Lop constructLops() {
// return already created lops
if (getLops() != null)
return getLops();
ExecType et = optFindExecType();
switch(op) {
case TRANSPOSE:
{
Lop lin = getInput().get(0).constructLops();
if (lin instanceof Transform && ((Transform) lin).getOperationType() == OperationTypes.Transpose)
// if input is already a transpose, avoid redundant transpose ops
setLops(lin.getInputs().get(0));
else if (getDim1() == 1 && getDim2() == 1)
// if input of size 1x1, avoid unnecessary transpose
setLops(lin);
else {
// general case
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
Transform transform1 = new Transform(lin, HopsTransf2Lops.get(op), getDataType(), getValueType(), et, k);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
}
break;
}
case DIAG:
{
Transform transform1 = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
break;
}
case REV:
{
Lop rev = null;
if (et == ExecType.MR) {
Lop tmp = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
setOutputDimensions(tmp);
setLineNumbers(tmp);
Group group1 = new Group(tmp, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
setOutputDimensions(group1);
setLineNumbers(group1);
rev = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), et);
} else {
// CP/SPARK
rev = new Transform(getInput().get(0).constructLops(), HopsTransf2Lops.get(op), getDataType(), getValueType(), et);
}
setOutputDimensions(rev);
setLineNumbers(rev);
setLops(rev);
break;
}
case RESHAPE:
{
// main, rows, cols, byrow
Lop[] linputs = new Lop[4];
for (int i = 0; i < 4; i++) linputs[i] = getInput().get(i).constructLops();
if (et == ExecType.MR) {
Transform transform1 = new Transform(linputs, HopsTransf2Lops.get(op), getDataType(), getValueType(), true, et);
setOutputDimensions(transform1);
setLineNumbers(transform1);
Group group1 = new Group(transform1, Group.OperationTypes.Sort, DataType.MATRIX, getValueType());
setOutputDimensions(group1);
setLineNumbers(group1);
Aggregate agg1 = new Aggregate(group1, Aggregate.OperationTypes.Sum, DataType.MATRIX, getValueType(), et);
setOutputDimensions(agg1);
setLineNumbers(agg1);
setLops(agg1);
} else // CP/SPARK
{
_outputEmptyBlocks = (et == ExecType.SPARK && !OptimizerUtils.allowsToFilterEmptyBlockOutputs(this));
Transform transform1 = new Transform(linputs, HopsTransf2Lops.get(op), getDataType(), getValueType(), _outputEmptyBlocks, et);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
}
break;
}
case SORT:
{
Hop input = getInput().get(0);
Hop by = getInput().get(1);
Hop desc = getInput().get(2);
Hop ixret = getInput().get(3);
if (et == ExecType.MR) {
if (!(desc instanceof LiteralOp && ixret instanceof LiteralOp)) {
LOG.warn("Unsupported non-constant ordering parameters, using defaults and mark for recompilation.");
setRequiresRecompile();
desc = new LiteralOp(false);
ixret = new LiteralOp(false);
}
// Step 1: extraction (if unknown ncol or multiple columns)
Hop vinput = input;
if (input.getDim2() != 1) {
vinput = new IndexingOp("tmp1", getDataType(), getValueType(), input, new LiteralOp(1L), HopRewriteUtils.createValueHop(input, true), by, by, false, true);
vinput.refreshSizeInformation();
vinput.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
HopRewriteUtils.copyLineNumbers(this, vinput);
}
// Step 2: Index vector sort
Hop voutput = null;
if (2 * OptimizerUtils.estimateSize(vinput.getDim1(), vinput.getDim2()) > OptimizerUtils.getLocalMemBudget() || FORCE_DIST_SORT_INDEXES) {
// large vector, fallback to MR sort
// sort indexes according to given values
SortKeys sort = new SortKeys(vinput.constructLops(), HopRewriteUtils.getBooleanValueSafe((LiteralOp) desc), SortKeys.OperationTypes.Indexes, vinput.getDataType(), vinput.getValueType(), ExecType.MR);
sort.getOutputParameters().setDimensions(vinput.getDim1(), 1, vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
setLineNumbers(sort);
// note: this sortindexes includes also the shift by offsets and
// final aggregate because sideways passing of offsets would
// not nicely fit the current instruction model
setLops(sort);
voutput = this;
} else {
// small vector, use in-memory sort
ArrayList<Hop> sinputs = new ArrayList<>();
sinputs.add(vinput);
// by (always vector)
sinputs.add(new LiteralOp(1));
sinputs.add(desc);
// indexreturn (always indexes)
sinputs.add(new LiteralOp(true));
voutput = new ReorgOp("tmp3", getDataType(), getValueType(), ReOrgOp.SORT, sinputs);
HopRewriteUtils.copyLineNumbers(this, voutput);
// explicitly construct CP lop; otherwise there is danger of infinite recursion if forced runtime platform.
voutput.setLops(constructCPOrSparkSortLop(vinput, sinputs.get(1), sinputs.get(2), sinputs.get(3), ExecType.CP, false));
voutput.getLops().getOutputParameters().setDimensions(vinput.getDim1(), vinput.getDim2(), vinput.getRowsInBlock(), vinput.getColsInBlock(), vinput.getNnz());
setLops(voutput.constructLops());
}
// -- done via X' = table(seq(), IX') %*% X;
if (!HopRewriteUtils.getBooleanValueSafe((LiteralOp) ixret)) {
// generate seq
DataGenOp seq = HopRewriteUtils.createSeqDataGenOp(voutput);
seq.setName("tmp4");
seq.refreshSizeInformation();
// select exec type
seq.computeMemEstimate(new MemoTable());
HopRewriteUtils.copyLineNumbers(this, seq);
// generate table
TernaryOp table = new TernaryOp("tmp5", DataType.MATRIX, ValueType.DOUBLE, OpOp3.CTABLE, seq, voutput, new LiteralOp(1L));
table.setOutputBlocksizes(getRowsInBlock(), getColsInBlock());
table.refreshSizeInformation();
// force MR
table.setForcedExecType(ExecType.MR);
HopRewriteUtils.copyLineNumbers(this, table);
table.setDisjointInputs(true);
table.setOutputEmptyBlocks(false);
// generate matrix mult
AggBinaryOp mmult = HopRewriteUtils.createMatrixMultiply(table, input);
// force MR
mmult.setForcedExecType(ExecType.MR);
setLops(mmult.constructLops());
// cleanups
HopRewriteUtils.removeChildReference(table, input);
}
} else if (et == ExecType.SPARK) {
boolean sortRewrite = !FORCE_DIST_SORT_INDEXES && isSortSPRewriteApplicable() && by.getDataType().isScalar();
Lop transform1 = constructCPOrSparkSortLop(input, by, desc, ixret, et, sortRewrite);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
} else // CP
{
Lop transform1 = constructCPOrSparkSortLop(input, by, desc, ixret, et, false);
setOutputDimensions(transform1);
setLineNumbers(transform1);
setLops(transform1);
}
break;
}
default:
throw new HopsException("Unsupported lops construction for operation type '" + op + "'.");
}
// add reblock/checkpoint lops if necessary
constructAndSetLopsDataFlowProperties();
return getLops();
}
use of org.apache.sysml.lops.Aggregate in project systemml by apache.
the class TernaryOp method constructLopsCtable.
/**
* Method to construct LOPs when op = CTABLE.
*/
private void constructLopsCtable() {
if (_op != OpOp3.CTABLE)
throw new HopsException("Unexpected operation: " + _op + ", expecting " + OpOp3.CTABLE);
/*
* We must handle three different cases: case1 : all three
* inputs are vectors (e.g., F=ctable(A,B,W)) case2 : two
* vectors and one scalar (e.g., F=ctable(A,B)) case3 : one
* vector and two scalars (e.g., F=ctable(A))
*/
// identify the particular case
// F=ctable(A,B,W)
DataType dt1 = getInput().get(0).getDataType();
DataType dt2 = getInput().get(1).getDataType();
DataType dt3 = getInput().get(2).getDataType();
Ctable.OperationTypes ternaryOpOrig = Ctable.findCtableOperationByInputDataTypes(dt1, dt2, dt3);
// Compute lops for all inputs
Lop[] inputLops = new Lop[getInput().size()];
for (int i = 0; i < getInput().size(); i++) {
inputLops[i] = getInput().get(i).constructLops();
}
ExecType et = optFindExecType();
// reset reblock requirement (see MR ctable / construct lops)
setRequiresReblock(false);
if (et == ExecType.CP || et == ExecType.SPARK) {
// for CP we support only ctable expand left
Ctable.OperationTypes ternaryOp = isSequenceRewriteApplicable(true) ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ternaryOpOrig;
boolean ignoreZeros = false;
if (isMatrixIgnoreZeroRewriteApplicable()) {
// table - rmempty - rshape
ignoreZeros = true;
inputLops[0] = ((ParameterizedBuiltinOp) getInput().get(0)).getTargetHop().getInput().get(0).constructLops();
inputLops[1] = ((ParameterizedBuiltinOp) getInput().get(1)).getTargetHop().getInput().get(0).constructLops();
}
Ctable ternary = new Ctable(inputLops, ternaryOp, getDataType(), getValueType(), ignoreZeros, et);
ternary.getOutputParameters().setDimensions(_dim1, _dim2, getRowsInBlock(), getColsInBlock(), -1);
setLineNumbers(ternary);
// force blocked output in CP (see below), otherwise binarycell
if (et == ExecType.SPARK) {
ternary.getOutputParameters().setDimensions(_dim1, _dim2, -1, -1, -1);
setRequiresReblock(true);
} else
ternary.getOutputParameters().setDimensions(_dim1, _dim2, getRowsInBlock(), getColsInBlock(), -1);
// ternary opt, w/o reblock in CP
setLops(ternary);
} else // MR
{
// for MR we support both ctable expand left and right
Ctable.OperationTypes ternaryOp = isSequenceRewriteApplicable() ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ternaryOpOrig;
Group group1 = null, group2 = null, group3 = null, group4 = null;
group1 = new Group(inputLops[0], Group.OperationTypes.Sort, getDataType(), getValueType());
group1.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group1);
Ctable ternary = null;
// create "group" lops for MATRIX inputs
switch(ternaryOp) {
case CTABLE_TRANSFORM:
// F = ctable(A,B,W)
group2 = new Group(inputLops[1], Group.OperationTypes.Sort, getDataType(), getValueType());
group2.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group2);
group3 = new Group(inputLops[2], Group.OperationTypes.Sort, getDataType(), getValueType());
group3.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group3);
if (inputLops.length == 3)
ternary = new Ctable(new Lop[] { group1, group2, group3 }, ternaryOp, getDataType(), getValueType(), et);
else
// output dimensions are given
ternary = new Ctable(new Lop[] { group1, group2, group3, inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
break;
case CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
group2 = new Group(inputLops[1], Group.OperationTypes.Sort, getDataType(), getValueType());
group2.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group2);
if (inputLops.length == 3)
ternary = new Ctable(new Lop[] { group1, group2, inputLops[2] }, ternaryOp, getDataType(), getValueType(), et);
else
ternary = new Ctable(new Lop[] { group1, group2, inputLops[2], inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
break;
case CTABLE_EXPAND_SCALAR_WEIGHT:
// F=ctable(seq(1,N),A) or F = ctable(seq,A,1)
// left 1, right 0 (index of input data)
int left = isSequenceRewriteApplicable(true) ? 1 : 0;
Group group = new Group(getInput().get(left).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
group.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
if (inputLops.length == 3)
ternary = new Ctable(new Lop[] { // matrix
group, // weight
getInput().get(2).constructLops(), // left
new LiteralOp(left).constructLops() }, ternaryOp, getDataType(), getValueType(), et);
else
ternary = new Ctable(new Lop[] { // matrix
group, // weight
getInput().get(2).constructLops(), // left
new LiteralOp(left).constructLops(), inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
break;
case CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
if (inputLops.length == 3)
ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), getInput().get(2).constructLops() }, ternaryOp, getDataType(), getValueType(), et);
else
ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), getInput().get(2).constructLops(), inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
break;
case CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
group3 = new Group(getInput().get(2).constructLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
group3.getOutputParameters().setDimensions(getDim1(), getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(group3);
if (inputLops.length == 3)
ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), group3 }, ternaryOp, getDataType(), getValueType(), et);
else
ternary = new Ctable(new Lop[] { group1, getInput().get(1).constructLops(), group3, inputLops[3], inputLops[4] }, ternaryOp, getDataType(), getValueType(), et);
break;
default:
throw new HopsException("Invalid ternary operator type: " + _op);
}
// output dimensions are not known at compilation time
ternary.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
setLineNumbers(ternary);
Lop lctable = ternary;
if (!(_disjointInputs || ternaryOp == Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT)) {
// no need for aggregation if (1) input indexed disjoint or one side is sequence w/ 1 increment
group4 = new Group(ternary, Group.OperationTypes.Sort, getDataType(), getValueType());
group4.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
setLineNumbers(group4);
Aggregate agg1 = new Aggregate(group4, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(_dim1, _dim2, (_dimInputsPresent ? getRowsInBlock() : -1), (_dimInputsPresent ? getColsInBlock() : -1), -1);
setLineNumbers(agg1);
// kahamSum is used for aggregation but inputs do not have
// correction values
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
lctable = agg1;
}
setLops(lctable);
// to introduce reblock lop since table itself outputs in blocked format if dims known.
if (!dimsKnown() && !_dimInputsPresent) {
setRequiresReblock(true);
}
}
}
use of org.apache.sysml.lops.Aggregate in project systemml by apache.
the class AggBinaryOp method constructMRLopsCPMMWithLeftTransposeRewrite.
private Lop constructMRLopsCPMMWithLeftTransposeRewrite() {
// guaranteed to exists
Hop X = getInput().get(0).getInput().get(0);
Hop Y = getInput().get(1);
// right vector transpose CP
Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(tY);
// matrix multiply
MMCJType type = getMMCJAggregationType(X, Y);
MMCJ mmcj = new MMCJ(tY, X.constructLops(), getDataType(), getValueType(), type, ExecType.MR);
setOutputDimensions(mmcj);
setLineNumbers(mmcj);
Group grp = new Group(mmcj, Group.OperationTypes.Sort, getDataType(), getValueType());
setOutputDimensions(grp);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
setOutputDimensions(agg1);
setLineNumbers(agg1);
// aggregation uses kahanSum but the inputs do not have correction values
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
// result transpose CP
Lop out = new Transform(agg1, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
return out;
}
Aggregations