use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class MatrixAppendRSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
// reduce-only append (output must have at most one column block)
SparkExecutionContext sec = (SparkExecutionContext) ec;
checkBinaryAppendInputCharacteristics(sec, _cbind, true, false);
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
// execute reduce-append operations (partitioning preserving)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2).mapValues(new ReduceSideAppendFunction(_cbind));
// put output RDD handle into symbol table
updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class PmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
MatrixCharacteristics mc = sec.getMatrixCharacteristics(output.getName());
long rlen = sec.getScalarInput(_nrow.getName(), _nrow.getValueType(), _nrow.isLiteral()).getLongValue();
// get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
// execute pmm instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDPMMFunction(_type, in2, rlen, mc.getRowsPerBlock()));
out = RDDAggregateUtils.sumByKeyStable(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
// update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, false);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class QuantilePickSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdds
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
// (in contrast to cp instructions, w/o weights does not materializes weights of 1)
switch(_type) {
case VALUEPICK:
{
ScalarObject quantile = ec.getScalarInput(input2);
double[] wt = getWeightedQuantileSummary(in, mc, quantile.getDoubleValue());
ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
break;
}
case MEDIAN:
{
double[] wt = getWeightedQuantileSummary(in, mc, 0.5);
ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
break;
}
case IQM:
{
double[] wt = getWeightedQuantileSummary(in, mc, 0.25, 0.75);
long key25 = (long) Math.ceil(wt[1]);
long key75 = (long) Math.ceil(wt[2]);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.filter(new FilterFunction(key25 + 1, key75, mc.getRowsPerBlock())).mapToPair(new ExtractAndSumFunction(key25 + 1, key75, mc.getRowsPerBlock()));
double sum = RDDAggregateUtils.sumStable(out).getValue(0, 0);
double val = MatrixBlock.computeIQMCorrection(sum, wt[0], wt[3], wt[5], wt[4], wt[6]);
ec.setScalarOutput(output.getName(), new DoubleObject(val));
break;
}
default:
throw new DMLRuntimeException("Unsupported qpick operation type: " + _type);
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class SpoofSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// decide upon broadcast side inputs
boolean[] bcVect = determineBroadcastInputs(sec, _in);
boolean[] bcVect2 = getMatrixBroadcastVector(sec, _in, bcVect);
int main = getMainInputIndex(_in, bcVect);
// create joined input rdd w/ replication if needed
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(_in[main].getName());
JavaPairRDD<MatrixIndexes, MatrixBlock[]> in = createJoinedInputRDD(sec, _in, bcVect, (_class.getSuperclass() == SpoofOuterProduct.class));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
// create lists of input broadcasts and scalars
ArrayList<PartitionedBroadcast<MatrixBlock>> bcMatrices = new ArrayList<>();
ArrayList<ScalarObject> scalars = new ArrayList<>();
for (int i = 0; i < _in.length; i++) {
if (_in[i].getDataType() == DataType.MATRIX && bcVect[i]) {
bcMatrices.add(sec.getBroadcastForVariable(_in[i].getName()));
} else if (_in[i].getDataType() == DataType.SCALAR) {
// note: even if literal, it might be compiled as scalar placeholder
scalars.add(sec.getScalarInput(_in[i].getName(), _in[i].getValueType(), _in[i].isLiteral()));
}
}
// execute generated operator
if (// CELL
_class.getSuperclass() == SpoofCellwise.class) {
SpoofCellwise op = (SpoofCellwise) CodegenUtils.createInstance(_class);
AggregateOperator aggop = getAggregateOperator(op.getAggOp());
if (_out.getDataType() == DataType.MATRIX) {
// execute codegen block operation
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if ((op.getCellType() == CellType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) || (op.getCellType() == CellType.COL_AGG && mcIn.getRows() > mcIn.getRowsPerBlock())) {
long numBlocks = (op.getCellType() == CellType.ROW_AGG) ? mcIn.getNumRowBlocks() : mcIn.getNumColBlocks();
out = RDDAggregateUtils.aggByKeyStable(out, aggop, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
} else {
// SCALAR
out = in.mapPartitionsToPair(new CellwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmpMB = RDDAggregateUtils.aggStable(out, aggop);
sec.setVariable(_out.getName(), new DoubleObject(tmpMB.getValue(0, 0)));
}
} else if (// MAGG
_class.getSuperclass() == SpoofMultiAggregate.class) {
SpoofMultiAggregate op = (SpoofMultiAggregate) CodegenUtils.createInstance(_class);
AggOp[] aggOps = op.getAggOps();
MatrixBlock tmpMB = in.mapToPair(new MultiAggregateFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars)).values().fold(new MatrixBlock(), new MultiAggAggregateFunction(aggOps));
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
} else if (// OUTER
_class.getSuperclass() == SpoofOuterProduct.class) {
if (_out.getDataType() == DataType.MATRIX) {
SpoofOperator op = (SpoofOperator) CodegenUtils.createInstance(_class);
OutProdType type = ((SpoofOuterProduct) op).getOuterProdType();
// update matrix characteristics
updateOutputMatrixCharacteristics(sec, op);
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(_out.getName());
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
if (type == OutProdType.LEFT_OUTER_PRODUCT || type == OutProdType.RIGHT_OUTER_PRODUCT) {
long numBlocks = mcOut.getNumRowBlocks() * mcOut.getNumColBlocks();
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), numBlocks), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
} else {
out = in.mapPartitionsToPair(new OuterProductFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars), true);
MatrixBlock tmp = RDDAggregateUtils.sumStable(out);
sec.setVariable(_out.getName(), new DoubleObject(tmp.getValue(0, 0)));
}
} else if (_class.getSuperclass() == SpoofRowwise.class) {
// ROW
if (mcIn.getCols() > mcIn.getColsPerBlock()) {
throw new DMLRuntimeException("Invalid spark rowwise operator w/ ncol=" + mcIn.getCols() + ", ncolpb=" + mcIn.getColsPerBlock() + ".");
}
SpoofRowwise op = (SpoofRowwise) CodegenUtils.createInstance(_class);
long clen2 = op.getRowType().isConstDim2(op.getConstDim2()) ? op.getConstDim2() : op.getRowType().isRowTypeB1() ? sec.getMatrixCharacteristics(_in[1].getName()).getCols() : -1;
RowwiseFunction fmmc = new RowwiseFunction(_class.getName(), _classBytes, bcVect2, bcMatrices, scalars, (int) mcIn.getCols(), (int) clen2);
out = in.mapPartitionsToPair(fmmc, op.getRowType() == RowType.ROW_AGG || op.getRowType() == RowType.NO_AGG);
if (op.getRowType().isColumnAgg() || op.getRowType() == RowType.FULL_AGG) {
MatrixBlock tmpMB = RDDAggregateUtils.sumStable(out);
if (op.getRowType().isColumnAgg())
sec.setMatrixOutput(_out.getName(), tmpMB, getExtendedOpcode());
else
sec.setScalarOutput(_out.getName(), new DoubleObject(tmpMB.quickGetValue(0, 0)));
} else // row-agg or no-agg
{
if (op.getRowType() == RowType.ROW_AGG && mcIn.getCols() > mcIn.getColsPerBlock()) {
out = RDDAggregateUtils.sumByKeyStable(out, (int) Math.min(out.getNumPartitions(), mcIn.getNumRowBlocks()), false);
}
sec.setRDDHandleForVariable(_out.getName(), out);
// maintain lineage info and output characteristics
maintainLineageInfo(sec, _in, bcVect, _out);
updateOutputMatrixCharacteristics(sec, op);
}
} else {
throw new DMLRuntimeException("Operator " + _class.getSuperclass() + " is not supported on Spark");
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class UaggOuterChainSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
boolean rightCached = (_uaggOp.indexFn instanceof ReduceCol || _uaggOp.indexFn instanceof ReduceAll || !LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp));
String rddVar = (rightCached) ? input1.getName() : input2.getName();
String bcastVar = (rightCached) ? input2.getName() : input1.getName();
// get rdd input
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddVar);
boolean noKeyChange = preservesPartitioning(mcIn, _uaggOp.indexFn);
// execute UAggOuterChain instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp)) {
// create sorted broadcast matrix
MatrixBlock mb = sec.getMatrixInput(bcastVar, getExtendedOpcode());
sec.releaseMatrixInput(bcastVar, getExtendedOpcode());
// prevent lineage tracking
bcastVar = null;
double[] vmb = DataConverter.convertToDoubleVector(mb);
Broadcast<int[]> bvi = null;
if (_uaggOp.aggOp.increOp.fn instanceof Builtin) {
int[] vix = LibMatrixOuterAgg.prepareRowIndices(mb.getNumColumns(), vmb, _bOp, _uaggOp);
bvi = sec.getSparkContext().broadcast(vix);
} else
Arrays.sort(vmb);
Broadcast<double[]> bv = sec.getSparkContext().broadcast(vmb);
// partitioning-preserving map-to-pair (under constraints)
out = in1.mapPartitionsToPair(new RDDMapUAggOuterChainFunction(bv, bvi, _bOp, _uaggOp), noKeyChange);
} else {
PartitionedBroadcast<MatrixBlock> bv = sec.getBroadcastForVariable(bcastVar);
// partitioning-preserving map-to-pair (under constraints)
out = in1.mapPartitionsToPair(new RDDMapGenUAggOuterChainFunction(bv, _uaggOp, _aggOp, _bOp, mcIn), noKeyChange);
}
// final aggregation if required
if (// RC AGG (output is scalar)
_uaggOp.indexFn instanceof ReduceAll) {
MatrixBlock tmp = RDDAggregateUtils.aggStable(out, _aggOp);
// drop correction after aggregation
tmp.dropLastRowsOrColumns(_aggOp.correctionLocation);
// put output block into symbol table (no lineage because single block)
sec.setMatrixOutput(output.getName(), tmp, getExtendedOpcode());
} else // R/C AGG (output is rdd)
{
// put output RDD handle into symbol table
updateUnaryAggOutputMatrixCharacteristics(sec);
if (_uaggOp.aggOp.correctionExists)
out = out.mapValues(new AggregateDropCorrectionFunction(_uaggOp.aggOp));
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
if (bcastVar != null)
sec.addLineageBroadcast(output.getName(), bcastVar);
}
}
Aggregations