use of org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange in project incubator-systemml by apache.
the class ReorgSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
if (//TRANSPOSE
opcode.equalsIgnoreCase("r'")) {
//execute transpose reorg operation
out = in1.mapToPair(new ReorgMapFunction(opcode));
} else if (//REVERSE
opcode.equalsIgnoreCase("rev")) {
//execute reverse reorg operation
out = in1.flatMapToPair(new RDDRevFunction(mcIn));
if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
out = RDDAggregateUtils.mergeByKey(out, false);
} else if (// DIAG
opcode.equalsIgnoreCase("rdiag")) {
if (mcIn.getCols() == 1) {
// diagV2M
out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
} else {
// diagM2V
//execute diagM2V operation
out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
}
} else if (//ORDER
opcode.equalsIgnoreCase("rsort")) {
// Sort by column 'col' in ascending/descending order and return either index/value
//get parameters
long col = ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue();
boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
boolean singleCol = (mcIn.getCols() == 1);
// extract column (if necessary) and sort
out = in1;
if (!singleCol) {
out = out.filter(new IsBlockInRange(1, mcIn.getRows(), col, col, mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(col, mcIn.getColsPerBlock())));
}
//actual index/data sort operation
if (ixret) {
//sort indexes
out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
} else if (singleCol && !desc) {
//sort single-column matrix
out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
} else {
//sort multi-column matrix
if (!_bSortIndInMem)
out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
else
out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
}
} else {
throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
}
//store output rdd handle
updateReorgMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange in project incubator-systemml by apache.
the class Tsmm2SPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
//execute tsmm2 instruction
//step 1: first pass of X, filter-collect-broadcast excess blocks
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp1 = in.filter(new IsBlockInRange(_type.isLeft() ? 1 : mc.getRowsPerBlock() + 1, mc.getRows(), _type.isLeft() ? mc.getColsPerBlock() + 1 : 1, mc.getCols(), mc)).mapToPair(new ShiftTSMMIndexesFunction(_type));
PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(tmp1, (int) (_type.isLeft() ? mc.getRows() : mc.getRows() - mc.getRowsPerBlock()), (int) (_type.isLeft() ? mc.getCols() - mc.getColsPerBlock() : mc.getCols()), mc.getRowsPerBlock(), mc.getColsPerBlock(), -1L);
Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
//step 2: second pass of X, compute tsmm/mapmm and aggregate result blocks
int outputDim = (int) (_type.isLeft() ? mc.getCols() : mc.getRows());
if (OptimizerUtils.estimateSize(outputDim, outputDim) <= 32 * 1024 * 1024) {
//default: <=32MB
//output large blocks and reduceAll to avoid skew on combineByKey
JavaRDD<MatrixBlock> tmp2 = in.map(new RDDTSMM2ExtFunction(bpmb, _type, outputDim, (int) mc.getRowsPerBlock()));
MatrixBlock out = RDDAggregateUtils.sumStable(tmp2);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out);
} else {
//output individual output blocks and aggregate by key (no action)
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp2 = in.flatMapToPair(new RDDTSMM2Function(bpmb, _type));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDAggregateUtils.sumByKeyStable(tmp2, false);
//put output RDD handle into symbol table
sec.getMatrixCharacteristics(output.getName()).set(outputDim, outputDim, mc.getRowsPerBlock(), mc.getColsPerBlock());
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
use of org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange in project incubator-systemml by apache.
the class PMapmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
// This avoids errors such as java.lang.UnsupportedOperationException: Cannot change storage level of an RDD after it was already assigned a level
// Ideally, we should ensure that we donot redundantly call persist on the same RDD.
StorageLevel pmapmmStorageLevel = StorageLevel.MEMORY_AND_DISK();
//cache right hand side because accessed many times
in2 = in2.repartition(sec.getSparkContext().defaultParallelism()).persist(pmapmmStorageLevel);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
for (int i = 0; i < mc1.getRows(); i += NUM_ROWBLOCKS * mc1.getRowsPerBlock()) {
//create broadcast for rdd partition
JavaPairRDD<MatrixIndexes, MatrixBlock> rdd = in1.filter(new IsBlockInRange(i + 1, i + NUM_ROWBLOCKS * mc1.getRowsPerBlock(), 1, mc1.getCols(), mc1)).mapToPair(new PMapMMRebaseBlocksFunction(i / mc1.getRowsPerBlock()));
int rlen = (int) Math.min(mc1.getRows() - i, NUM_ROWBLOCKS * mc1.getRowsPerBlock());
PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(rdd, rlen, (int) mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock(), -1L);
Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
//matrix multiplication
JavaPairRDD<MatrixIndexes, MatrixBlock> rdd2 = in2.flatMapToPair(new PMapMMFunction(bpmb, i / mc1.getRowsPerBlock()));
rdd2 = RDDAggregateUtils.sumByKeyStable(rdd2, false);
rdd2.persist(pmapmmStorageLevel).count();
bpmb.unpersist(false);
if (out == null)
out = rdd2;
else
out = out.union(rdd2);
}
//cache final result
out = out.persist(pmapmmStorageLevel);
out.count();
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
//update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
use of org.apache.sysml.runtime.instructions.spark.functions.IsBlockInRange in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method multiBlockIndexing.
private static MatrixBlock multiBlockIndexing(JavaPairRDD<MatrixIndexes, MatrixBlock> in1, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, IndexRange ixrange) throws DMLRuntimeException {
//create list of all required matrix indexes
List<MatrixIndexes> filter = new ArrayList<MatrixIndexes>();
long rlix = UtilFunctions.computeBlockIndex(ixrange.rowStart, mcIn.getRowsPerBlock());
long ruix = UtilFunctions.computeBlockIndex(ixrange.rowEnd, mcIn.getRowsPerBlock());
long clix = UtilFunctions.computeBlockIndex(ixrange.colStart, mcIn.getColsPerBlock());
long cuix = UtilFunctions.computeBlockIndex(ixrange.colEnd, mcIn.getColsPerBlock());
for (long r = rlix; r <= ruix; r++) for (long c = clix; c <= cuix; c++) filter.add(new MatrixIndexes(r, c));
//wrap PartitionPruningRDD around input to exploit pruning for out-of-core datasets
JavaPairRDD<MatrixIndexes, MatrixBlock> out = createPartitionPruningRDD(in1, filter);
out = //filter unnecessary blocks
out.filter(new IsBlockInRange(ixrange.rowStart, ixrange.rowEnd, ixrange.colStart, ixrange.colEnd, mcOut)).mapToPair(//slice relevant blocks
new SliceBlock2(ixrange, mcOut));
//collect output without shuffle to avoid side-effects with custom PartitionPruningRDD
MatrixBlock mbout = SparkExecutionContext.toMatrixBlock(out, (int) mcOut.getRows(), (int) mcOut.getCols(), mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), -1);
return mbout;
}
Aggregations