use of org.apache.sysml.runtime.instructions.cp.ScalarObject in project systemml by apache.
the class Recompiler method extractDAGOutputStatistics.
public static void extractDAGOutputStatistics(Hop hop, LocalVariableMap vars, boolean overwrite) {
if (// for all writes to symbol table
hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.TRANSIENTWRITE) {
String varName = hop.getName();
if (// not existing so far
!vars.keySet().contains(varName) || overwrite) {
// extract matrix sizes for size propagation
if (hop.getDataType() == DataType.MATRIX) {
MatrixObject mo = new MatrixObject(ValueType.DOUBLE, null);
MatrixCharacteristics mc = new MatrixCharacteristics(hop.getDim1(), hop.getDim2(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), hop.getNnz());
MetaDataFormat meta = new MetaDataFormat(mc, null, null);
mo.setMetaData(meta);
vars.put(varName, mo);
} else // extract scalar constants for second constant propagation
if (hop.getDataType() == DataType.SCALAR) {
// extract literal assignments
if (hop.getInput().size() == 1 && hop.getInput().get(0) instanceof LiteralOp) {
ScalarObject constant = HopRewriteUtils.getScalarObject((LiteralOp) hop.getInput().get(0));
if (constant != null)
vars.put(varName, constant);
} else // extract constant variable assignments
if (hop.getInput().size() == 1 && hop.getInput().get(0) instanceof DataOp) {
DataOp dop = (DataOp) hop.getInput().get(0);
String dopvarname = dop.getName();
if (dop.isRead() && vars.keySet().contains(dopvarname)) {
ScalarObject constant = (ScalarObject) vars.get(dopvarname);
// no clone because constant
vars.put(varName, constant);
}
} else // extract ncol/nrow variable assignments
if (hop.getInput().size() == 1 && hop.getInput().get(0) instanceof UnaryOp && (((UnaryOp) hop.getInput().get(0)).getOp() == OpOp1.NROW || ((UnaryOp) hop.getInput().get(0)).getOp() == OpOp1.NCOL)) {
UnaryOp uop = (UnaryOp) hop.getInput().get(0);
if (uop.getOp() == OpOp1.NROW && uop.getInput().get(0).getDim1() > 0)
vars.put(varName, new IntObject(uop.getInput().get(0).getDim1()));
else if (uop.getOp() == OpOp1.NCOL && uop.getInput().get(0).getDim2() > 0)
vars.put(varName, new IntObject(uop.getInput().get(0).getDim2()));
} else // remove other updated scalars
{
// we need to remove other updated scalars in order to ensure result
// correctness of recompilation w/o being too conservative
vars.remove(varName);
}
}
} else // already existing: take largest
{
Data dat = vars.get(varName);
if (dat instanceof MatrixObject) {
MatrixObject mo = (MatrixObject) dat;
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
if (OptimizerUtils.estimateSizeExactSparsity(mc.getRows(), mc.getCols(), (mc.getNonZeros() >= 0) ? ((double) mc.getNonZeros()) / mc.getRows() / mc.getCols() : 1.0) < OptimizerUtils.estimateSize(hop.getDim1(), hop.getDim2())) {
// update statistics if necessary
mc.setDimension(hop.getDim1(), hop.getDim2());
mc.setNonZeros(hop.getNnz());
}
} else // scalar (just overwrite)
{
if (hop.getInput().size() == 1 && hop.getInput().get(0) instanceof LiteralOp) {
ScalarObject constant = HopRewriteUtils.getScalarObject((LiteralOp) hop.getInput().get(0));
if (constant != null)
vars.put(varName, constant);
}
}
}
}
}
use of org.apache.sysml.runtime.instructions.cp.ScalarObject in project systemml by apache.
the class SpoofSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// decide upon broadcast side inputs
boolean[] bcVect = determineBroadcastInputs(sec, _in);
boolean[] bcVect2 = getMatrixBroadcastVector(sec, _in, bcVect);
int main = getMainInputIndex(_in, bcVect);
// create joined input rdd w/ replication if needed
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[main].getName());
JavaPairRDD<MatrixIndexes, MatrixBlock[]> in = createJoinedInputRDD(sec, _in, bcVect, (_class.getSuperclass() == SpoofOuterProduct.class));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
// create lists of input broadcasts and scalars
ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices = new ArrayList<>();
ArrayList<ScalarObject> scalars = new ArrayList<>();
for (int i = 0; i < _in.length; i++) {
if (_in[i].getDataType() == DataType.MATRIX && bcVect[i]) {
bcMatrices.add(sec.getBroadcastForVariable(_in[i].getName()));
} else if (_in[i].getDataType() == DataType.SCALAR) {
// note: even if literal, it might be compiled as scalar placeholder
scalars.add(sec.getScalarInput(_in[i].getName(), _in[i].getValueType(), _in[i].isLiteral()));
}
}
// execute generated operator
if (// CELL
_class.getSuperclass() == SpoofCellwise.class) {
SpoofCellwise op = (SpoofCellwise) CodegenUtils.createInstance(_class);
AggregateOperator aggop = getAggregateOperator(op.getAggOp());
if (_out.getDataType() == DataType.MATRIX) {
// execute codegen block operation
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if ((op.getCellType() == CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) || (op.getCellType() == CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
long numBlocks = (op.getCellType() == CellType.ROW_AGG) ? mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
} else {
// SCALAR
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
}
} else if (// MAGG
_class.getSuperclass() == SpoofMultiAggregate.class) {
SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
AggOp[] aggOps = op.getAggOps();
MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars)).values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps));
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
} else if (// OUTER
_class.getSuperclass() == SpoofOuterProduct.class) {
if (_out.getDataType() == DataType.MATRIX) {
SpoofOperator op = (SpoofOperator) CodegenUtils.createInstance(_class);
OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
// update matrix characteristics
updateOutputMatrixCharacteristics(sec, op);
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if (type == OutProdType.LEFT_OUTER_PRODUCT || type == OutProdType.RIGHT_OUTER_PRODUCT) {
long numBlocks = mcOut.getNumRowBlocks() * mcOut.getNumColBlocks();
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
} else {
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
sec.setVariable(_out.getName(), new DoubleObject(tmp.getValue(0, 0)));
}
} else if (_class.getSuperclass() == SpoofRowwise.class) {
// ROW
if (mcIn.getCols() > mcIn.getColsPerBlock()) {
throw new DMLRuntimeException("Invalid spark rowwise operator w/ ncol=" + mcIn.getCols() + ", ncolpb=" + mcIn.getColsPerBlock() + ".");
}
SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() : op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars, (int) mcIn.getCols(), (int) clen2);
out = in.mapPartitionsToPair(fmmc, op.getRowType() == RowType.ROW_AGG || op.getRowType() == RowType.NO_AGG);
if (op.getRowType().isColumnAgg() || op.getRowType() == RowType.FULL_AGG) {
MatrixBlock tmpMB = RDDAggregateUtils.sumStable(out);
if (op.getRowType().isColumnAgg())
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
else
sec.setScalarOutput(_out.getName(), new DoubleObject(tmpMB.quickGetValue(0, 0)));
} else // row-agg or no-agg
{
if (op.getRowType() == RowType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) {
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), mcIn.getNumRowBlocks()), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
}
} else {
throw new DMLRuntimeException("Operator " + _class.getSuperclass() + " is not supported on Spark");
}
}
Aggregations