use of org.apache.sysml.lops.WeightedSquaredLoss in project systemml by apache.
the class QuaternaryOp method constructSparkLopsWeightedSquaredLoss.
private void constructSparkLopsWeightedSquaredLoss(WeightsType wtype) {
// NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
// Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
// and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
// required because the max_int byte buffer constraint has been fixed in Spark 1.4
double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop W = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// map-side wsloss always with broadcast
Lop wsloss = new WeightedSquaredLoss(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, ExecType.SPARK);
setOutputDimensions(wsloss);
setLineNumbers(wsloss);
setLops(wsloss);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
// reduce-side wsloss w/ or without broadcast
Lop wsloss = new WeightedSquaredLossR(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
setOutputDimensions(wsloss);
setLineNumbers(wsloss);
setLops(wsloss);
}
}
use of org.apache.sysml.lops.WeightedSquaredLoss in project systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.
private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) {
// NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop W = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
// requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
// partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
// requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
// map-side wsloss always with broadcast
Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop grpW = W.constructLops();
if (grpW.getDataType() == DataType.MATRIX) {
grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
setLineNumbers(grpW);
}
Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
// reduce-side wsloss w/ or without broadcast
Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.WeightedSquaredLoss in project incubator-systemml by apache.
the class QuaternaryOp method constructMRLopsWeightedSquaredLoss.
private void constructMRLopsWeightedSquaredLoss(WeightsType wtype) {
// NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop W = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetMap(true));
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// partitioning of U
boolean needPartU = !U.dimsKnown() || U.getDim1() * U.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lU = U.constructLops();
if (needPartU) {
// requires partitioning
lU = new DataPartition(lU, DataType.MATRIX, ValueType.DOUBLE, (m1Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lU.getOutputParameters().setDimensions(U.getDim1(), U.getDim2(), getRowsInBlock(), getColsInBlock(), U.getNnz());
setLineNumbers(lU);
}
// partitioning of V
boolean needPartV = !V.dimsKnown() || V.getDim1() * V.getDim2() > DistributedCacheInput.PARTITION_SIZE;
Lop lV = V.constructLops();
if (needPartV) {
// requires partitioning
lV = new DataPartition(lV, DataType.MATRIX, ValueType.DOUBLE, (m2Size > OptimizerUtils.getLocalMemBudget()) ? ExecType.MR : ExecType.CP, PDataPartitionFormat.ROW_BLOCK_WISE_N);
lV.getOutputParameters().setDimensions(V.getDim1(), V.getDim2(), getRowsInBlock(), getColsInBlock(), V.getNnz());
setLineNumbers(lV);
}
// map-side wsloss always with broadcast
Lop wsloss = new WeightedSquaredLoss(X.constructLops(), lU, lV, W.constructLops(), DataType.MATRIX, ValueType.DOUBLE, wtype, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < OptimizerUtils.getRemoteMemBudgetReduce());
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < OptimizerUtils.getRemoteMemBudgetReduce()) || (cacheU && m1Size + m2Size < OptimizerUtils.getRemoteMemBudgetReduce()));
Group grpX = new Group(X.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpX.getOutputParameters().setDimensions(X.getDim1(), X.getDim2(), X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grpX);
Lop grpW = W.constructLops();
if (grpW.getDataType() == DataType.MATRIX) {
grpW = new Group(W.constructLops(), Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grpW.getOutputParameters().setDimensions(W.getDim1(), W.getDim2(), W.getRowsInBlock(), W.getColsInBlock(), -1);
setLineNumbers(grpW);
}
Lop lU = constructLeftFactorMRLop(U, V, cacheU, m1Size);
Lop lV = constructRightFactorMRLop(U, V, cacheV, m2Size);
// reduce-side wsloss w/ or without broadcast
Lop wsloss = new WeightedSquaredLossR(grpX, lU, lV, grpW, DataType.MATRIX, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.MR);
wsloss.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(wsloss);
Group grp = new Group(wsloss, Group.OperationTypes.Sort, DataType.MATRIX, ValueType.DOUBLE);
grp.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(grp);
Aggregate agg1 = new Aggregate(grp, HopsAgg2Lops.get(AggOp.SUM), DataType.MATRIX, ValueType.DOUBLE, ExecType.MR);
// aggregation uses kahanSum
agg1.setupCorrectionLocation(CorrectionLocationType.NONE);
agg1.getOutputParameters().setDimensions(1, 1, X.getRowsInBlock(), X.getColsInBlock(), -1);
setLineNumbers(agg1);
UnaryCP unary1 = new UnaryCP(agg1, HopsOpOp1LopsUS.get(OpOp1.CAST_AS_SCALAR), getDataType(), getValueType());
unary1.getOutputParameters().setDimensions(0, 0, 0, 0, -1);
setLineNumbers(unary1);
setLops(unary1);
}
}
use of org.apache.sysml.lops.WeightedSquaredLoss in project incubator-systemml by apache.
the class QuaternaryOp method constructCPLopsWeightedSquaredLoss.
private void constructCPLopsWeightedSquaredLoss(WeightsType wtype) {
WeightedSquaredLoss wsloss = new WeightedSquaredLoss(getInput().get(0).constructLops(), getInput().get(1).constructLops(), getInput().get(2).constructLops(), getInput().get(3).constructLops(), getDataType(), getValueType(), wtype, ExecType.CP);
// set degree of parallelism
int k = OptimizerUtils.getConstrainedNumThreads(_maxNumThreads);
wsloss.setNumThreads(k);
setOutputDimensions(wsloss);
setLineNumbers(wsloss);
setLops(wsloss);
}
use of org.apache.sysml.lops.WeightedSquaredLoss in project incubator-systemml by apache.
the class QuaternaryOp method constructSparkLopsWeightedSquaredLoss.
private void constructSparkLopsWeightedSquaredLoss(WeightsType wtype) {
// NOTE: the common case for wsloss are factors U/V with a rank of 10s to 100s; the current runtime only
// supports single block outer products (U/V rank <= blocksize, i.e., 1000 by default); we enforce this
// by applying the hop rewrite for Weighted Squared Loss only if this constraint holds.
// Notes: Any broadcast needs to fit twice in local memory because we partition the input in cp,
// and needs to fit once in executor broadcast memory. The 2GB broadcast constraint is no longer
// required because the max_int byte buffer constraint has been fixed in Spark 1.4
double memBudgetExec = SparkExecutionContext.getBroadcastMemoryBudget();
double memBudgetLocal = OptimizerUtils.getLocalMemBudget();
Hop X = getInput().get(0);
Hop U = getInput().get(1);
Hop V = getInput().get(2);
Hop W = getInput().get(3);
// MR operator selection, part1
// size U
double m1Size = OptimizerUtils.estimateSize(U.getDim1(), U.getDim2());
// size V
double m2Size = OptimizerUtils.estimateSize(V.getDim1(), V.getDim2());
boolean isMapWsloss = (!wtype.hasFourInputs() && m1Size + m2Size < memBudgetExec && 2 * m1Size < memBudgetLocal && 2 * m2Size < memBudgetLocal);
if (// broadcast
!FORCE_REPLICATION && isMapWsloss) {
// map-side wsloss always with broadcast
Lop wsloss = new WeightedSquaredLoss(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, ExecType.SPARK);
setOutputDimensions(wsloss);
setLineNumbers(wsloss);
setLops(wsloss);
} else // general case
{
// MR operator selection part 2
boolean cacheU = !FORCE_REPLICATION && (m1Size < memBudgetExec && 2 * m1Size < memBudgetLocal);
boolean cacheV = !FORCE_REPLICATION && ((!cacheU && m2Size < memBudgetExec) || (cacheU && m1Size + m2Size < memBudgetExec)) && 2 * m2Size < memBudgetLocal;
// reduce-side wsloss w/ or without broadcast
Lop wsloss = new WeightedSquaredLossR(X.constructLops(), U.constructLops(), V.constructLops(), W.constructLops(), DataType.SCALAR, ValueType.DOUBLE, wtype, cacheU, cacheV, ExecType.SPARK);
setOutputDimensions(wsloss);
setLineNumbers(wsloss);
setLops(wsloss);
}
}
Aggregations