use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class PMapmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
// This avoids errors such as java.lang.UnsupportedOperationException: Cannot change storage level of an RDD after it was already assigned a level
// Ideally, we should ensure that we donot redundantly call persist on the same RDD.
StorageLevel pmapmmStorageLevel = StorageLevel.MEMORY_AND_DISK();
//cache right hand side because accessed many times
in2 = in2.repartition(sec.getSparkContext().defaultParallelism()).persist(pmapmmStorageLevel);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
for (int i = 0; i < mc1.getRows(); i += NUM_ROWBLOCKS * mc1.getRowsPerBlock()) {
//create broadcast for rdd partition
JavaPairRDD<MatrixIndexes, MatrixBlock> rdd = in1.filter(new IsBlockInRange(i + 1, i + NUM_ROWBLOCKS * mc1.getRowsPerBlock(), 1, mc1.getCols(), mc1)).mapToPair(new PMapMMRebaseBlocksFunction(i / mc1.getRowsPerBlock()));
int rlen = (int) Math.min(mc1.getRows() - i, NUM_ROWBLOCKS * mc1.getRowsPerBlock());
PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(rdd, rlen, (int) mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock(), -1L);
Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
//matrix multiplication
JavaPairRDD<MatrixIndexes, MatrixBlock> rdd2 = in2.flatMapToPair(new PMapMMFunction(bpmb, i / mc1.getRowsPerBlock()));
rdd2 = RDDAggregateUtils.sumByKeyStable(rdd2, false);
rdd2.persist(pmapmmStorageLevel).count();
bpmb.unpersist(false);
if (out == null)
out = rdd2;
else
out = out.union(rdd2);
}
//cache final result
out = out.persist(pmapmmStorageLevel);
out.count();
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
//update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class ParameterizedBuiltinSPInstruction method processInstruction.
@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
//opcode guaranteed to be a valid opcode (see parsing)
if (opcode.equalsIgnoreCase("mapgroupedagg")) {
//get input rdd handle
String targetVar = params.get(Statement.GAGG_TARGET);
String groupsVar = params.get(Statement.GAGG_GROUPS);
JavaPairRDD<MatrixIndexes, MatrixBlock> target = sec.getBinaryBlockRDDHandleForVariable(targetVar);
PartitionedBroadcast<MatrixBlock> groups = sec.getBroadcastForVariable(groupsVar);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(targetVar);
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
CPOperand ngrpOp = new CPOperand(params.get(Statement.GAGG_NUM_GROUPS));
int ngroups = (int) sec.getScalarInput(ngrpOp.getName(), ngrpOp.getValueType(), ngrpOp.isLiteral()).getLongValue();
//single-block aggregation
if (ngroups <= mc1.getRowsPerBlock() && mc1.getCols() <= mc1.getColsPerBlock()) {
//execute map grouped aggregate
JavaRDD<MatrixBlock> out = target.map(new RDDMapGroupedAggFunction2(groups, _optr, ngroups));
MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out2);
} else //multi-block aggregation
{
//execute map grouped aggregate
JavaPairRDD<MatrixIndexes, MatrixBlock> out = target.flatMapToPair(new RDDMapGroupedAggFunction(groups, _optr, ngroups, mc1.getRowsPerBlock(), mc1.getColsPerBlock()));
out = RDDAggregateUtils.sumByKeyStable(out, false);
//updated characteristics and handle outputs
mcOut.set(ngroups, mc1.getCols(), mc1.getRowsPerBlock(), mc1.getColsPerBlock(), -1);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), targetVar);
sec.addLineageBroadcast(output.getName(), groupsVar);
}
} else if (opcode.equalsIgnoreCase("groupedagg")) {
boolean broadcastGroups = Boolean.parseBoolean(params.get("broadcast"));
//get input rdd handle
String groupsVar = params.get(Statement.GAGG_GROUPS);
JavaPairRDD<MatrixIndexes, MatrixBlock> target = sec.getBinaryBlockRDDHandleForVariable(params.get(Statement.GAGG_TARGET));
JavaPairRDD<MatrixIndexes, MatrixBlock> groups = broadcastGroups ? null : sec.getBinaryBlockRDDHandleForVariable(groupsVar);
JavaPairRDD<MatrixIndexes, MatrixBlock> weights = null;
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(params.get(Statement.GAGG_TARGET));
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(groupsVar);
if (mc1.dimsKnown() && mc2.dimsKnown() && (mc1.getRows() != mc2.getRows() || mc2.getCols() != 1)) {
throw new DMLRuntimeException("Grouped Aggregate dimension mismatch between target and groups.");
}
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
JavaPairRDD<MatrixIndexes, WeightedCell> groupWeightedCells = null;
// Step 1: First extract groupWeightedCells from group, target and weights
if (params.get(Statement.GAGG_WEIGHTS) != null) {
weights = sec.getBinaryBlockRDDHandleForVariable(params.get(Statement.GAGG_WEIGHTS));
MatrixCharacteristics mc3 = sec.getMatrixCharacteristics(params.get(Statement.GAGG_WEIGHTS));
if (mc1.dimsKnown() && mc3.dimsKnown() && (mc1.getRows() != mc3.getRows() || mc1.getCols() != mc3.getCols())) {
throw new DMLRuntimeException("Grouped Aggregate dimension mismatch between target, groups, and weights.");
}
groupWeightedCells = groups.join(target).join(weights).flatMapToPair(new ExtractGroupNWeights());
} else //input vector or matrix
{
String ngroupsStr = params.get(Statement.GAGG_NUM_GROUPS);
long ngroups = (ngroupsStr != null) ? (long) Double.parseDouble(ngroupsStr) : -1;
//execute basic grouped aggregate (extract and preagg)
if (broadcastGroups) {
PartitionedBroadcast<MatrixBlock> pbm = sec.getBroadcastForVariable(groupsVar);
groupWeightedCells = target.flatMapToPair(new ExtractGroupBroadcast(pbm, mc1.getColsPerBlock(), ngroups, _optr));
} else {
//replicate groups if necessary
if (mc1.getNumColBlocks() > 1) {
groups = groups.flatMapToPair(new ReplicateVectorFunction(false, mc1.getNumColBlocks()));
}
groupWeightedCells = groups.join(target).flatMapToPair(new ExtractGroupJoin(mc1.getColsPerBlock(), ngroups, _optr));
}
}
// Step 2: Make sure we have brlen required while creating <MatrixIndexes, MatrixCell>
if (mc1.getRowsPerBlock() == -1) {
throw new DMLRuntimeException("The block sizes are not specified for grouped aggregate");
}
int brlen = mc1.getRowsPerBlock();
// Step 3: Now perform grouped aggregate operation (either on combiner side or reducer side)
JavaPairRDD<MatrixIndexes, MatrixCell> out = null;
if (_optr instanceof CMOperator && ((CMOperator) _optr).isPartialAggregateOperator() || _optr instanceof AggregateOperator) {
out = groupWeightedCells.reduceByKey(new PerformGroupByAggInCombiner(_optr)).mapValues(new CreateMatrixCell(brlen, _optr));
} else {
// Use groupby key because partial aggregation is not supported
out = groupWeightedCells.groupByKey().mapValues(new PerformGroupByAggInReducer(_optr)).mapValues(new CreateMatrixCell(brlen, _optr));
}
// Step 4: Set output characteristics and rdd handle
setOutputCharacteristicsForGroupedAgg(mc1, mcOut, out);
//store output rdd handle
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), params.get(Statement.GAGG_TARGET));
sec.addLineage(output.getName(), groupsVar, broadcastGroups);
if (params.get(Statement.GAGG_WEIGHTS) != null) {
sec.addLineageRDD(output.getName(), params.get(Statement.GAGG_WEIGHTS));
}
} else if (opcode.equalsIgnoreCase("rmempty")) {
String rddInVar = params.get("target");
String rddOffVar = params.get("offset");
boolean rows = sec.getScalarInput(params.get("margin"), ValueType.STRING, true).getStringValue().equals("rows");
long maxDim = sec.getScalarInput(params.get("maxdim"), ValueType.DOUBLE, false).getLongValue();
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddInVar);
if (//default case
maxDim > 0) {
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(rddInVar);
JavaPairRDD<MatrixIndexes, MatrixBlock> off;
PartitionedBroadcast<MatrixBlock> broadcastOff;
long brlen = mcIn.getRowsPerBlock();
long bclen = mcIn.getColsPerBlock();
long numRep = (long) Math.ceil(rows ? (double) mcIn.getCols() / bclen : (double) mcIn.getRows() / brlen);
//execute remove empty rows/cols operation
JavaPairRDD<MatrixIndexes, MatrixBlock> out;
if (_bRmEmptyBC) {
broadcastOff = sec.getBroadcastForVariable(rddOffVar);
// Broadcast offset vector
out = in.flatMapToPair(new RDDRemoveEmptyFunctionInMem(rows, maxDim, brlen, bclen, broadcastOff));
} else {
off = sec.getBinaryBlockRDDHandleForVariable(rddOffVar);
out = in.join(off.flatMapToPair(new ReplicateVectorFunction(!rows, numRep))).flatMapToPair(new RDDRemoveEmptyFunction(rows, maxDim, brlen, bclen));
}
out = RDDAggregateUtils.mergeByKey(out, false);
//store output rdd handle
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddInVar);
if (!_bRmEmptyBC)
sec.addLineageRDD(output.getName(), rddOffVar);
else
sec.addLineageBroadcast(output.getName(), rddOffVar);
//update output statistics (required for correctness)
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(rows ? maxDim : mcIn.getRows(), rows ? mcIn.getCols() : maxDim, (int) brlen, (int) bclen, mcIn.getNonZeros());
} else //special case: empty output (ensure valid dims)
{
MatrixBlock out = new MatrixBlock(rows ? 1 : (int) mcIn.getRows(), rows ? (int) mcIn.getCols() : 1, true);
sec.setMatrixOutput(output.getName(), out);
}
} else if (opcode.equalsIgnoreCase("replace")) {
//get input rdd handle
String rddVar = params.get("target");
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddVar);
//execute replace operation
double pattern = Double.parseDouble(params.get("pattern"));
double replacement = Double.parseDouble(params.get("replacement"));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.mapValues(new RDDReplaceFunction(pattern, replacement));
//store output rdd handle
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
//update output statistics (required for correctness)
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), (pattern != 0 && replacement != 0) ? mcIn.getNonZeros() : -1);
} else if (opcode.equalsIgnoreCase("rexpand")) {
String rddInVar = params.get("target");
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(rddInVar);
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(rddInVar);
double maxVal = Double.parseDouble(params.get("max"));
long lmaxVal = UtilFunctions.toLong(maxVal);
boolean dirRows = params.get("dir").equals("rows");
boolean cast = Boolean.parseBoolean(params.get("cast"));
boolean ignore = Boolean.parseBoolean(params.get("ignore"));
long brlen = mcIn.getRowsPerBlock();
long bclen = mcIn.getColsPerBlock();
//repartition input vector for higher degree of parallelism
//(avoid scenarios where few input partitions create huge outputs)
MatrixCharacteristics mcTmp = new MatrixCharacteristics(dirRows ? lmaxVal : mcIn.getRows(), dirRows ? mcIn.getRows() : lmaxVal, (int) brlen, (int) bclen, mcIn.getRows());
int numParts = (int) Math.min(SparkUtils.getNumPreferredPartitions(mcTmp, in), mcIn.getNumBlocks());
if (numParts > in.getNumPartitions() * 2)
in = in.repartition(numParts);
//execute rexpand rows/cols operation (no shuffle required because outputs are
//block-aligned with the input, i.e., one input block generates n output blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.flatMapToPair(new RDDRExpandFunction(maxVal, dirRows, cast, ignore, brlen, bclen));
//store output rdd handle
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddInVar);
//update output statistics (required for correctness)
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(dirRows ? lmaxVal : mcIn.getRows(), dirRows ? mcIn.getRows() : lmaxVal, (int) brlen, (int) bclen, -1);
} else if (opcode.equalsIgnoreCase("transform")) {
// perform data transform on Spark
try {
DataTransform.spDataTransform(this, new FrameObject[] { sec.getFrameObject(params.get("target")) }, new MatrixObject[] { sec.getMatrixObject(output.getName()) }, ec);
} catch (Exception e) {
throw new DMLRuntimeException(e);
}
} else if (opcode.equalsIgnoreCase("transformapply")) {
//get input RDD and meta data
FrameObject fo = sec.getFrameObject(params.get("target"));
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForFrameObject(fo, InputInfo.BinaryBlockInputInfo);
FrameBlock meta = sec.getFrameInput(params.get("meta"));
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(params.get("target"));
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
String[] colnames = !TfMetaUtils.isIDSpecification(params.get("spec")) ? in.lookup(1L).get(0).getColumnNames() : null;
//compute omit offset map for block shifts
TfOffsetMap omap = null;
if (TfMetaUtils.containsOmitSpec(params.get("spec"), colnames)) {
omap = new TfOffsetMap(SparkUtils.toIndexedLong(in.mapToPair(new RDDTransformApplyOffsetFunction(params.get("spec"), colnames)).collect()));
}
//create encoder broadcast (avoiding replication per task)
Encoder encoder = EncoderFactory.createEncoder(params.get("spec"), colnames, fo.getSchema(), (int) fo.getNumColumns(), meta);
mcOut.setDimension(mcIn.getRows() - ((omap != null) ? omap.getNumRmRows() : 0), encoder.getNumCols());
Broadcast<Encoder> bmeta = sec.getSparkContext().broadcast(encoder);
Broadcast<TfOffsetMap> bomap = (omap != null) ? sec.getSparkContext().broadcast(omap) : null;
//execute transform apply
JavaPairRDD<Long, FrameBlock> tmp = in.mapToPair(new RDDTransformApplyFunction(bmeta, bomap));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = FrameRDDConverterUtils.binaryBlockToMatrixBlock(tmp, mcOut, mcOut);
//set output and maintain lineage/output characteristics
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), params.get("target"));
ec.releaseFrameInput(params.get("meta"));
} else if (opcode.equalsIgnoreCase("transformdecode")) {
//get input RDD and meta data
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(params.get("target"));
MatrixCharacteristics mc = sec.getMatrixCharacteristics(params.get("target"));
FrameBlock meta = sec.getFrameInput(params.get("meta"));
String[] colnames = meta.getColumnNames();
//reblock if necessary (clen > bclen)
if (mc.getCols() > mc.getNumColBlocks()) {
in = in.mapToPair(new RDDTransformDecodeExpandFunction((int) mc.getCols(), mc.getColsPerBlock()));
in = RDDAggregateUtils.mergeByKey(in, false);
}
//construct decoder and decode individual matrix blocks
Decoder decoder = DecoderFactory.createDecoder(params.get("spec"), colnames, null, meta);
JavaPairRDD<Long, FrameBlock> out = in.mapToPair(new RDDTransformDecodeFunction(decoder, mc.getRowsPerBlock()));
//set output and maintain lineage/output characteristics
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), params.get("target"));
ec.releaseFrameInput(params.get("meta"));
sec.getMatrixCharacteristics(output.getName()).set(mc.getRows(), meta.getNumColumns(), mc.getRowsPerBlock(), mc.getColsPerBlock(), -1);
sec.getFrameObject(output.getName()).setSchema(decoder.getSchema());
} else {
throw new DMLRuntimeException("Unknown parameterized builtin opcode: " + opcode);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MatrixAppendMSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
// map-only append (rhs must be vector and fit in mapper mem)
SparkExecutionContext sec = (SparkExecutionContext) ec;
checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(input2.getName());
long off = sec.getScalarInput(_offset.getName(), _offset.getValueType(), _offset.isLiteral()).getLongValue();
//execute map-append operations (partitioning preserving if #in-blocks = #out-blocks)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (preservesPartitioning(mc1, mc2, _cbind)) {
out = in1.mapPartitionsToPair(new MapSideAppendPartitionFunction(in2, _cbind, off, brlen, bclen), true);
} else {
out = in1.flatMapToPair(new MapSideAppendFunction(in2, _cbind, off, brlen, bclen));
}
//put output RDD handle into symbol table
updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageBroadcast(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
//get indexing range
long rl = ec.getScalarInput(rowLower.getName(), rowLower.getValueType(), rowLower.isLiteral()).getLongValue();
long ru = ec.getScalarInput(rowUpper.getName(), rowUpper.getValueType(), rowUpper.isLiteral()).getLongValue();
long cl = ec.getScalarInput(colLower.getName(), colLower.getValueType(), colLower.isLiteral()).getLongValue();
long cu = ec.getScalarInput(colUpper.getName(), colUpper.getValueType(), colUpper.isLiteral()).getLongValue();
IndexRange ixrange = new IndexRange(rl, ru, cl, cu);
//right indexing
if (opcode.equalsIgnoreCase("rangeReIndex")) {
//update and check output dimensions
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(ru - rl + 1, cu - cl + 1, mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
checkValidOutputDimensions(mcOut);
//execute right indexing operation (partitioning-preserving if possible)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
if (isSingleBlockLookup(mcIn, ixrange)) {
sec.setMatrixOutput(output.getName(), singleBlockIndexing(in1, mcIn, mcOut, ixrange));
} else if (isMultiBlockLookup(in1, mcIn, mcOut, ixrange)) {
sec.setMatrixOutput(output.getName(), multiBlockIndexing(in1, mcIn, mcOut, ixrange));
} else {
//rdd output for general case
JavaPairRDD<MatrixIndexes, MatrixBlock> out = generalCaseRightIndexing(in1, mcIn, mcOut, ixrange, _aggType);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
} else //left indexing
if (opcode.equalsIgnoreCase("leftIndex") || opcode.equalsIgnoreCase("mapLeftIndex")) {
String rddVar = (_type == LixCacheType.LEFT) ? input2.getName() : input1.getName();
String bcVar = (_type == LixCacheType.LEFT) ? input1.getName() : input2.getName();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> broadcastIn2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
//update and check output dimensions
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
MatrixCharacteristics mcLeft = ec.getMatrixCharacteristics(input1.getName());
mcOut.set(mcLeft.getRows(), mcLeft.getCols(), mcLeft.getRowsPerBlock(), mcLeft.getColsPerBlock());
checkValidOutputDimensions(mcOut);
//note: always matrix rhs, scalars are preprocessed via cast to 1x1 matrix
MatrixCharacteristics mcRight = ec.getMatrixCharacteristics(input2.getName());
//sanity check matching index range and rhs dimensions
if (!mcRight.dimsKnown()) {
throw new DMLRuntimeException("The right input matrix dimensions are not specified for MatrixIndexingSPInstruction");
}
if (!(ru - rl + 1 == mcRight.getRows() && cu - cl + 1 == mcRight.getCols())) {
throw new DMLRuntimeException("Invalid index range of leftindexing: [" + rl + ":" + ru + "," + cl + ":" + cu + "] vs [" + mcRight.getRows() + "x" + mcRight.getCols() + "].");
}
if (opcode.equalsIgnoreCase("mapLeftIndex")) {
broadcastIn2 = sec.getBroadcastForVariable(bcVar);
//partitioning-preserving mappartitions (key access required for broadcast loopkup)
out = in1.mapPartitionsToPair(new LeftIndexPartitionFunction(broadcastIn2, ixrange, _type, mcOut), true);
} else {
//general case
// zero-out lhs
in1 = in1.mapToPair(new ZeroOutLHS(false, ixrange, mcLeft));
// slice rhs, shift and merge with lhs
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName()).flatMapToPair(new SliceRHSForLeftIndexing(ixrange, mcLeft));
out = RDDAggregateUtils.mergeByKey(in1.union(in2));
}
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
if (broadcastIn2 != null)
sec.addLineageBroadcast(output.getName(), bcVar);
if (in2 != null)
sec.addLineageRDD(output.getName(), input2.getName());
} else
throw new DMLRuntimeException("Invalid opcode (" + opcode + ") encountered in MatrixIndexingSPInstruction.");
}
use of org.apache.sysml.runtime.matrix.data.MatrixBlock in project incubator-systemml by apache.
the class MatrixIndexingSPInstruction method singleBlockIndexing.
private static MatrixBlock singleBlockIndexing(JavaPairRDD<MatrixIndexes, MatrixBlock> in1, MatrixCharacteristics mcIn, MatrixCharacteristics mcOut, IndexRange ixrange) throws DMLRuntimeException {
//single block output via lookup (on partitioned inputs, this allows for single partition
//access to avoid a full scan of the input; note that this is especially important for
//out-of-core datasets as entire partitions are read, not just keys as in the in-memory setting.
long rix = UtilFunctions.computeBlockIndex(ixrange.rowStart, mcIn.getRowsPerBlock());
long cix = UtilFunctions.computeBlockIndex(ixrange.colStart, mcIn.getColsPerBlock());
List<MatrixBlock> list = in1.lookup(new MatrixIndexes(rix, cix));
if (list.size() != 1)
throw new DMLRuntimeException("Block lookup returned " + list.size() + " blocks (expected 1).");
MatrixBlock tmp = list.get(0);
MatrixBlock mbout = (tmp.getNumRows() == mcOut.getRows() && tmp.getNumColumns() == mcOut.getCols()) ? tmp : //reference full block or slice out sub-block
tmp.sliceOperations(UtilFunctions.computeCellInBlock(ixrange.rowStart, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.rowEnd, mcIn.getRowsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colStart, mcIn.getColsPerBlock()), UtilFunctions.computeCellInBlock(ixrange.colEnd, mcIn.getColsPerBlock()), new MatrixBlock());
return mbout;
}
Aggregations