use of in project incubator-systemml by apache.
the class MatrixAppendMSPInstruction method processInstruction.
public void processInstruction(ExecutionContext ec) {
// map-only append (rhs must be vector and fit in mapper mem)
SparkExecutionContext sec = (SparkExecutionContext) ec;
checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(input2.getName());
long off = sec.getScalarInput(_offset.getName(), _offset.getValueType(), _offset.isLiteral()).getLongValue();
// execute map-append operations (partitioning preserving if #in-blocks = #out-blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (preservesPartitioning(mc1, mc2, _cbind)) {
out = in1.mapPartitionsToPair(new MapSideAppendPartitionFunction(in2, _cbind, off, brlen, bclen), true);
} else {
out = in1.flatMapToPair(new MapSideAppendFunction(in2, _cbind, off, brlen, bclen));
// put output RDD handle into symbol table
updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageBroadcast(output.getName(), input2.getName());
use of in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method singleBlockIndexing.
private static MatrixBlock singleBlockIndexing(JavaPairRDD<MatrixIndexes, MatrixBlock> in1, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, IndexRange ixrange) {
// single block output via lookup (on partitioned inputs, this allows for single partition
// access to avoid a full scan of the input; note that this is especially important for
// out-of-core datasets as entire partitions are read, not just keys as in the in-memory setting.
long rix = UtilFunctions.computeBlockIndex(ixrange.rowStart, mcIn.getRowsPerBlock());
long cix = UtilFunctions.computeBlockIndex(ixrange.colStart, mcIn.getColsPerBlock());
List<MatrixBlock> list = in1.lookup(new MatrixIndexes(rix, cix));
if (list.size() != 1)
throw new DMLRuntimeException("Block lookup returned " + list.size() + " blocks (expected 1).");
MatrixBlock tmp = list.get(0);
MatrixBlock mbout = (tmp.getNumRows() == mcOut.getRows() && tmp.getNumColumns() == mcOut.getCols()) ? tmp : // reference full block or slice out sub-block
tmp.slice(UtilFunctions.computeCellInBlock(ixrange.rowStart, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.rowEnd, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colStart, mcIn.getColsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colEnd, mcIn.getColsPerBlock()), new MatrixBlock());
return mbout;
use of in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method processInstruction.
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
// get indexing range
long rl = ec.getScalarInput(rowLower.getName(), rowLower.getValueType(), rowLower.isLiteral()).getLongValue();
long ru = ec.getScalarInput(rowUpper.getName(), rowUpper.getValueType(), rowUpper.isLiteral()).getLongValue();
long cl = ec.getScalarInput(colLower.getName(), colLower.getValueType(), colLower.isLiteral()).getLongValue();
long cu = ec.getScalarInput(colUpper.getName(), colUpper.getValueType(), colUpper.isLiteral()).getLongValue();
IndexRange ixrange = new IndexRange(rl, ru, cl, cu);
// right indexing
if (opcode.equalsIgnoreCase(RightIndex.OPCODE)) {
// update and check output dimensions
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(ru - rl + 1, cu - cl + 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
mcOut.setNonZerosBound(Math.min(mcOut.getLength(), mcIn.getNonZerosBound()));
// execute right indexing operation (partitioning-preserving if possible)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
if (isSingleBlockLookup(mcIn, ixrange)) {
sec.setMatrixOutput(output.getName(), singleBlockIndexing(in1, mcIn, mcOut, ixrange), getExtendedOpcode());
} else if (isMultiBlockLookup(in1, mcIn, mcOut, ixrange)) {
sec.setMatrixOutput(output.getName(), multiBlockIndexing(in1, mcIn, mcOut, ixrange), getExtendedOpcode());
} else {
// rdd output for general case
JavaPairRDD<MatrixIndexes, MatrixBlock> out = generalCaseRightIndexing(in1, mcIn, mcOut, ixrange, _aggType);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else // left indexing
if (opcode.equalsIgnoreCase(LeftIndex.OPCODE) || opcode.equalsIgnoreCase("mapLeftIndex")) {
String rddVar = (_type == LixCacheType.LEFT) ? input2.getName() : input1.getName();
String bcVar = (_type == LixCacheType.LEFT) ? input1.getName() : input2.getName();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> broadcastIn2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
// update and check output dimensions
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
MatrixCharacteristics mcLeft = ec.getMatrixCharacteristics(input1.getName());
mcOut.set(mcLeft.getRows(), mcLeft.getCols(), mcLeft.getRowsPerBlock(), mcLeft.getColsPerBlock());
// note: always matrix rhs, scalars are preprocessed via cast to 1x1 matrix
MatrixCharacteristics mcRight = ec.getMatrixCharacteristics(input2.getName());
// sanity check matching index range and rhs dimensions
if (!mcRight.dimsKnown()) {
throw new DMLRuntimeException("The right input matrix dimensions are not specified for MatrixIndexingSPInstruction");
if (!(ru - rl + 1 == mcRight.getRows() && cu - cl + 1 == mcRight.getCols())) {
throw new DMLRuntimeException("Invalid index range of leftindexing: [" + rl + ":" + ru + "," + cl + ":" + cu + "] vs [" + mcRight.getRows() + "x" + mcRight.getCols() + "].");
if (opcode.equalsIgnoreCase("mapLeftIndex")) {
broadcastIn2 = sec.getBroadcastForVariable(bcVar);
// partitioning-preserving mappartitions (key access required for broadcast loopkup)
out = in1.mapPartitionsToPair(new LeftIndexPartitionFunction(broadcastIn2, ixrange, _type, mcOut), true);
} else {
// general case
// zero-out lhs
in1 = in1.mapToPair(new ZeroOutLHS(false, ixrange, mcLeft));
// slice rhs, shift and merge with lhs
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName()).flatMapToPair(new SliceRHSForLeftIndexing(ixrange, mcLeft));
out = RDDAggregateUtils.mergeByKey(in1.union(in2));
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
if (broadcastIn2 != null)
sec.addLineageBroadcast(output.getName(), bcVar);
if (in2 != null)
sec.addLineageRDD(output.getName(), input2.getName());
} else
throw new DMLRuntimeException("Invalid opcode (" + opcode + ") encountered in MatrixIndexingSPInstruction.");
use of in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method createPartitionPruningRDD.
* Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
* of required partitions. The distinct set of required partitions is determined
* via the partitioner of the input RDD.
* @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
* @param filter partition filter
* @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
private static JavaPairRDD<MatrixIndexes, MatrixBlock> createPartitionPruningRDD(JavaPairRDD<MatrixIndexes, MatrixBlock> in, List<MatrixIndexes> filter) {
// build hashset of required partition ids
HashSet<Integer> flags = new HashSet<>();
Partitioner partitioner = in.rdd().partitioner().get();
for (MatrixIndexes key : filter) flags.add(partitioner.getPartition(key));
// create partition pruning rdd
Function1<Object, Object> f = new PartitionPruningFunction(flags);
PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD = PartitionPruningRDD.create(in.rdd(), f);
// wrap output into java pair rdd
return new JavaPairRDD<>(ppRDD, ClassManifestFactory.fromClass(MatrixIndexes.class), ClassManifestFactory.fromClass(MatrixBlock.class));
use of in project incubator-systemml by apache.
the class MatrixReshapeSPInstruction method processInstruction.
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get parameters
// save cast
long rows = ec.getScalarInput(_opRows.getName(), _opRows.getValueType(), _opRows.isLiteral()).getLongValue();
// save cast
long cols = ec.getScalarInput(_opCols.getName(), _opCols.getValueType(), _opCols.isLiteral()).getLongValue();
boolean byRow = ec.getScalarInput(_opByRow.getName(), ValueType.BOOLEAN, _opByRow.isLiteral()).getBooleanValue();
// get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// update output characteristics and sanity check
mcOut.set(rows, cols, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), mcIn.getNonZeros());
if (!mcIn.nnzKnown())
if (mcIn.getRows() * mcIn.getCols() != mcOut.getRows() * mcOut.getCols()) {
throw new DMLRuntimeException("Incompatible matrix characteristics for reshape: " + mcIn.getRows() + "x" + mcIn.getCols() + " vs " + mcOut.getRows() + "x" + mcOut.getCols());
// execute reshape instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDReshapeFunction(mcIn, mcOut, byRow));
out = RDDAggregateUtils.mergeByKey(out);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());