use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method singleBlockIndexing.
private static MatrixBlock singleBlockIndexing(JavaPairRDD<MatrixIndexes, MatrixBlock> in1, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, IndexRange ixrange) throws DMLRuntimeException {
//single block output via lookup (on partitioned inputs, this allows for single partition
//access to avoid a full scan of the input; note that this is especially important for
//out-of-core datasets as entire partitions are read, not just keys as in the in-memory setting.
long rix = UtilFunctions.computeBlockIndex(ixrange.rowStart, mcIn.getRowsPerBlock());
long cix = UtilFunctions.computeBlockIndex(ixrange.colStart, mcIn.getColsPerBlock());
List<MatrixBlock> list = in1.lookup(new MatrixIndexes(rix, cix));
if (list.size() != 1)
throw new DMLRuntimeException("Block lookup returned " + list.size() + " blocks (expected 1).");
MatrixBlock tmp = list.get(0);
MatrixBlock mbout = (tmp.getNumRows() == mcOut.getRows() && tmp.getNumColumns() == mcOut.getCols()) ? tmp : //reference full block or slice out sub-block
tmp.sliceOperations(UtilFunctions.computeCellInBlock(ixrange.rowStart, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.rowEnd, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colStart, mcIn.getColsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colEnd, mcIn.getColsPerBlock()), new MatrixBlock());
return mbout;
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class MapmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
CacheType type = _type;
String rddVar = type.isRight() ? input1.getName() : input2.getName();
String bcastVar = type.isRight() ? input2.getName() : input1.getName();
MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
MatrixCharacteristics mcBc = sec.getMatrixCharacteristics(bcastVar);
//get input rdd
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
//inputs - is required to ensure moderately sized output partitions (2GB limitation)
if (requiresFlatMapFunction(type, mcBc) && requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
int numParts = getNumRepartitioning(type, mcRdd, mcBc);
int numParts2 = getNumRepartitioning(type.getFlipped(), mcBc, mcRdd);
if (numParts2 > numParts) {
//flip required
type = type.getFlipped();
rddVar = type.isRight() ? input1.getName() : input2.getName();
bcastVar = type.isRight() ? input2.getName() : input1.getName();
mcRdd = sec.getMatrixCharacteristics(rddVar);
mcBc = sec.getMatrixCharacteristics(bcastVar);
in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
LOG.warn("Mapmm: Switching rdd ('" + bcastVar + "') and broadcast ('" + rddVar + "') inputs " + "for repartitioning because this allows better control of output partition " + "sizes (" + numParts + " < " + numParts2 + ").");
}
}
//get inputs
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
//empty input block filter
if (!_outputEmpty)
in1 = in1.filter(new FilterNonEmptyBlocksFunction());
//execute mapmm and aggregation if necessary and put output into symbol table
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
JavaRDD<MatrixBlock> out = in1.map(new RDDMapMMFunction2(type, in2));
MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out2);
} else //MULTI_BLOCK or NONE
{
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (requiresFlatMapFunction(type, mcBc)) {
if (requiresRepartitioning(type, mcRdd, mcBc, in1.getNumPartitions())) {
int numParts = getNumRepartitioning(type, mcRdd, mcBc);
LOG.warn("Mapmm: Repartition input rdd '" + rddVar + "' from " + in1.getNumPartitions() + " to " + numParts + " partitions to satisfy size restrictions of output partitions.");
in1 = in1.repartition(numParts);
}
out = in1.flatMapToPair(new RDDFlatMapMMFunction(type, in2));
} else if (preservesPartitioning(mcRdd, type))
out = in1.mapPartitionsToPair(new RDDMapMMPartitionFunction(type, in2), true);
else
out = in1.mapToPair(new RDDMapMMFunction(type, in2));
//empty output block filter
if (!_outputEmpty)
out = out.filter(new FilterNonEmptyBlocksFunction());
if (_aggtype == SparkAggType.MULTI_BLOCK)
out = RDDAggregateUtils.sumByKeyStable(out, false);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
//update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class CumulativeOffsetSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input2.getName());
long rlen = mc.getRows();
int brlen = mc.getRowsPerBlock();
//get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> inData = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> inAgg = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
//prepare aggregates (cumsplit of offsets)
inAgg = inAgg.flatMapToPair(new RDDCumSplitFunction(_initValue, rlen, brlen));
//execute cumulative offset (apply cumulative op w/ offsets)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = inData.join(inAgg).mapValues(new RDDCumOffsetFunction(_uop, _bop));
updateUnaryOutputMatrixCharacteristics(sec);
//put output handle in symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class CumulativeAggregateSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
long rlen = mc.getRows();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
//execute unary aggregate (w/ implicit drop correction)
AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapToPair(new RDDCumAggFunction(auop, rlen, brlen, bclen));
out = RDDAggregateUtils.mergeByKey(out, false);
//put output handle in symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class IndexSortStitchupMapper method configure.
@Override
public void configure(JobConf job) {
super.configure(job);
_offsets = parseOffsets(job.get(SortMR.SORT_INDEXES_OFFSETS));
_rlen = MRJobConfiguration.getNumRows(job, (byte) 0);
_brlen = MRJobConfiguration.getNumRowsPerBlock(job, (byte) 0);
_tmpIx = new MatrixIndexes();
_tmpBlk = new MatrixBlock((int) _brlen, 1, false);
}
Aggregations