use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedDivMM.
private void constructMRLopsWeightedDivMM(WDivMMType wtype) throws HopsException, LopsException {
//NOTE: the common case for wdivmm are factors U/V with a rank of 10s to 100s; the current runtime only
//supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
//by applying the hop rewrite for Weighted DivMM only if this constraint holds.
Hop W = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop X = getInput().get(3);
//MR operator selection, part1
//size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
//size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWdivmm = ((!wtype.hasFourInputs() || wtype.hasScalar()) && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (//broadcast
!FORCE_REPLICATION && isMapWdivmm) {
//partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
//partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
//map-side wdivmm always with broadcast
Lop wdivmm = new WeightedDivMM(W.constructLops(), lU, lV, X.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
setOutputDimensions(wdivmm);
setLineNumbers(wdivmm);
setLops(wdivmm);
} else //general case
{
//MR operator selection part 2 (both cannot happen for wdivmm, otherwise mapwdivmm)
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), W.getNnz());
setLineNumbers(grpW);
Lop grpX = X.constructLops();
if (wtype.hasFourInputs() && (X.getDataType() != DataType.SCALAR))
grpX = new Group(grpX, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
setLineNumbers(grpX);
Lop lU = null;
if (cacheU) {
//partitioning of U for read through distributed cache
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
} else {
//replication of U for shuffle to target block
//ncol of t(V) -> nrow of V determines num replicates
Lop offset = createOffsetLop(V, false);
lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
setLineNumbers(lU);
Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
setLineNumbers(grpU);
lU = grpU;
}
Lop lV = null;
if (cacheV) {
//partitioning of V for read through distributed cache
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
} else {
//replication of t(V) for shuffle to target block
Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(ltV);
//nrow of U determines num replicates
Lop offset = createOffsetLop(U, false);
lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(lV);
Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
setLineNumbers(grpV);
lV = grpV;
}
//reduce-side wdivmm w/ or without broadcast
Lop wdivmm = new WeightedDivMMR(grpW, lU, lV, grpX, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
setOutputDimensions(wdivmm);
setLineNumbers(wdivmm);
setLops(wdivmm);
}
//in contrast to to wsloss/wsigmoid, wdivmm requires partial aggregation (for the final mm)
Group grp = new Group(getLops(), Group.OperationTypes.Sort, getDataType(), getValueType());
setOutputDimensions(grp);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), getDataType(), getValueType(), ExecType.MR);
// aggregation uses kahanSum but the inputs do not have correction values
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
setOutputDimensions(agg1);
setLineNumbers(agg1);
setLops(agg1);
}
use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedCeMM.
private void constructMRLopsWeightedCeMM(WCeMMType wtype) throws HopsException, LopsException {
//NOTE: the common case for wcemm are factors U/V with a rank of 10s to 100s; the current runtime only
//supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
//by applying the hop rewrite for Weighted Cross Entropy only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop eps = getInput().get(3);
//MR operator selection, part1
//size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
//size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWcemm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (//broadcast
!FORCE_REPLICATION && isMapWcemm) {
//partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
//partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
//map-side wcemm always with broadcast
Lop wcemm = new WeightedCrossEntropy(X.constructLops(), lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else //general case
{
//MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop lU = null;
if (cacheU) {
//partitioning of U for read through distributed cache
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
} else {
//replication of U for shuffle to target block
//ncol of t(V) -> nrow of V determines num replicates
Lop offset = createOffsetLop(V, false);
lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
setLineNumbers(lU);
Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
setLineNumbers(grpU);
lU = grpU;
}
Lop lV = null;
if (cacheV) {
//partitioning of V for read through distributed cache
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
} else {
//replication of t(V) for shuffle to target block
Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(ltV);
//nrow of U determines num replicates
Lop offset = createOffsetLop(U, false);
lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(lV);
Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
setLineNumbers(grpV);
lV = grpV;
}
//reduce-side wcemm w/ or without broadcast
Lop wcemm = new WeightedCrossEntropyR(grpX, lU, lV, eps.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wcemm.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wcemm);
Group grp = new Group(wcemm, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.
the class AggBinaryOp method constructSparkLopsMapMMWithLeftTransposeRewrite.
private Lop constructSparkLopsMapMMWithLeftTransposeRewrite() throws HopsException, LopsException {
//guaranteed to exists
Hop X = getInput().get(0).getInput().get(0);
Hop Y = getInput().get(1);
//right vector transpose
Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(tY);
//matrix mult spark
boolean needAgg = requiresAggregation(MMultMethod.MAPMM_R);
SparkAggType aggtype = getSparkMMAggregationType(needAgg);
_outputEmptyBlocks = !OptimizerUtils.allowsToFilterEmptyBlockOutputs(this);
Lop mult = new MapMult(tY, X.constructLops(), getDataType(), getValueType(), false, false, _outputEmptyBlocks, aggtype);
mult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(mult);
//result transpose (dimensions set outside)
Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
return out;
}
use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.
the class AggBinaryOp method constructMRLopsMapMMWithLeftTransposeRewrite.
private Lop constructMRLopsMapMMWithLeftTransposeRewrite() throws HopsException, LopsException {
//guaranteed to exists
Hop X = getInput().get(0).getInput().get(0);
Hop Y = getInput().get(1);
//right vector transpose CP
Lop tY = new Transform(Y.constructLops(), OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
tY.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(tY);
//matrix mult
// If number of columns is smaller than block size then explicit aggregation is not required.
// i.e., entire matrix multiplication can be performed in the mappers.
boolean needAgg = (X.getDim1() <= 0 || X.getDim1() > X.getRowsInBlock());
//R disregarding transpose rewrite
boolean needPart = requiresPartitioning(MMultMethod.MAPMM_R, true);
//pre partitioning
Lop dcinput = null;
if (needPart) {
ExecType etPart = (OptimizerUtils.estimateSizeExactSparsity(Y.getDim2(), Y.getDim1(), OptimizerUtils.getSparsity(Y.getDim2(), Y.getDim1(), Y.getNnz())) < OptimizerUtils.getLocalMemBudget()) ? ExecType.CP : //operator selection
ExecType.MR;
dcinput = new DataPartition(tY, DataType.MATRIX, ValueType.DOUBLE, etPart, PDataPartitionFormat.COLUMN_BLOCK_WISE_N);
dcinput.getOutputParameters().setDimensions(Y.getDim2(), Y.getDim1(), getRowsInBlock(), getColsInBlock(), Y.getNnz());
setLineNumbers(dcinput);
} else
dcinput = tY;
MapMult mapmult = new MapMult(dcinput, X.constructLops(), getDataType(), getValueType(), false, needPart, false);
mapmult.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(mapmult);
//post aggregation
Lop mult = null;
if (needAgg) {
Group grp = new Group(mapmult, Group.OperationTypes.Sort, getDataType(), getValueType());
grp.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(outerOp), getDataType(), getValueType(), ExecType.MR);
agg1.getOutputParameters().setDimensions(Y.getDim2(), X.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
setLineNumbers(agg1);
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
mult = agg1;
} else
mult = mapmult;
//result transpose CP
Lop out = new Transform(mult, OperationTypes.Transpose, getDataType(), getValueType(), ExecType.CP);
out.getOutputParameters().setDimensions(X.getDim2(), Y.getDim2(), getRowsInBlock(), getColsInBlock(), getNnz());
return out;
}
use of org.apache.sysml.lops.Transform in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedUMM.
private void constructMRLopsWeightedUMM(WUMMType wtype) throws HopsException, LopsException {
//NOTE: the common case for wumm are factors U/V with a rank of 10s to 100s; the current runtime only
//supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
//by applying the hop rewrite for Weighted UnaryMM only if this constraint holds.
Unary.OperationTypes uop = _uop != null ? HopsOpOp1LopsU.get(_uop) : _sop == OpOp2.POW ? Unary.OperationTypes.POW2 : Unary.OperationTypes.MULTIPLY2;
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
//MR operator selection, part1
//size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
//size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWumm = (m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (//broadcast
!FORCE_REPLICATION && isMapWumm) {
//partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
//partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
//map-side wumm always with broadcast
Lop wumm = new WeightedUnaryMM(X.constructLops(), lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, ExecType.MR);
setOutputDimensions(wumm);
setLineNumbers(wumm);
setLops(wumm);
//in contrast to wsloss no aggregation required
} else //general case
{
//MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), X.getNnz());
setLineNumbers(grpX);
Lop lU = null;
if (cacheU) {
//partitioning of U for read through distributed cache
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lU = U.constructLops();
if (needPartU) {
//requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
} else {
//replication of U for shuffle to target block
//ncol of t(V) -> nrow of V determines num replicates
Lop offset = createOffsetLop(V, false);
lU = new RepMat(U.constructLops(), offset, true, V.getDataType(), V.getValueType());
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), U.getNnz());
setLineNumbers(lU);
Group grpU = new Group(lU, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), U.getRowsInBlock(), U.getColsInBlock(), -1);
setLineNumbers(grpU);
lU = grpU;
}
Lop lV = null;
if (cacheV) {
//partitioning of V for read through distributed cache
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
lV = V.constructLops();
if (needPartV) {
//requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
} else {
//replication of t(V) for shuffle to target block
Transform ltV = new Transform(V.constructLops(), HopsTransf2Lops.get(ReOrgOp.TRANSPOSE), getDataType(), getValueType(), ExecType.MR);
ltV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(ltV);
//nrow of U determines num replicates
Lop offset = createOffsetLop(U, false);
lV = new RepMat(ltV, offset, false, V.getDataType(), V.getValueType());
lV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), V.getNnz());
setLineNumbers(lV);
Group grpV = new Group(lV, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpV.getOutputParameters().setDimensions(V.getDim2(), V.getDim1(), V.getColsInBlock(), V.getRowsInBlock(), -1);
setLineNumbers(grpV);
lV = grpV;
}
//reduce-side wumm w/ or without broadcast
Lop wumm = new WeightedUnaryMMR(grpX, lU, lV, DataType.MATRIX, ValueType.DOUBLE, wtype, uop, cacheU, cacheV, ExecType.MR);
setOutputDimensions(wumm);
setLineNumbers(wumm);
setLops(wumm);
//in contrast to wsloss no aggregation required
}
}
Aggregations