use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class TernarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
double scalar_input2 = -1, scalar_input3 = -1;
Ternary.OperationTypes ctableOp = Ternary.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
ctableOp = _isExpand ? Ternary.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// First get the block sizes and then set them as -1 to allow for binary cell reblock
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
boolean setLineage2 = false;
boolean setLineage3 = false;
switch(ctableOp) {
case //(VECTOR)
CTABLE_TRANSFORM:
// F=ctable(A,B,W)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage2 = true;
setLineage3 = true;
inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case //(VECTOR)
CTABLE_EXPAND_SCALAR_WEIGHT:
// F = ctable(seq,A) or F = ctable(seq,B,1)
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
if (scalar_input3 == 1) {
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
break;
}
case //(VECTOR/MATRIX)
CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case //(VECTOR)
CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.mapToPair(new MapMBIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case //(VECTOR)
CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage3 = true;
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
default:
throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
}
// Now perform aggregation on ctables to get binaryCells
if (bincellsNoFilter == null && ctables != null) {
bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
} else if (!(bincellsNoFilter != null && ctables == null)) {
throw new DMLRuntimeException("Incorrect ctable operation");
}
// handle known/unknown dimensions
long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
MatrixCharacteristics mcBinaryCells = null;
boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
if (!findDimensions) {
if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
else
mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
// filtering according to given dimensions
bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
}
// convert double values to matrix cell
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
// find dimensions if necessary (w/ cache for reblock)
if (findDimensions) {
binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
}
//store output rdd handle
sec.setRDDHandleForVariable(output.getName(), binaryCells);
mcOut.set(mcBinaryCells);
// Since we are outputing binary cells, we set block sizes = -1
mcOut.setRowsPerBlock(-1);
mcOut.setColsPerBlock(-1);
sec.addLineageRDD(output.getName(), input1.getName());
if (setLineage2)
sec.addLineageRDD(output.getName(), input2.getName());
if (setLineage3)
sec.addLineageRDD(output.getName(), input3.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class Tsmm2SPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
//execute tsmm2 instruction
//step 1: first pass of X, filter-collect-broadcast excess blocks
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp1 = in.filter(new IsBlockInRange(_type.isLeft() ? 1 : mc.getRowsPerBlock() + 1, mc.getRows(), _type.isLeft() ? mc.getColsPerBlock() + 1 : 1, mc.getCols(), mc)).mapToPair(new ShiftTSMMIndexesFunction(_type));
PartitionedBlock<MatrixBlock> pmb = SparkExecutionContext.toPartitionedMatrixBlock(tmp1, (int) (_type.isLeft() ? mc.getRows() : mc.getRows() - mc.getRowsPerBlock()), (int) (_type.isLeft() ? mc.getCols() - mc.getColsPerBlock() : mc.getCols()), mc.getRowsPerBlock(), mc.getColsPerBlock(), -1L);
Broadcast<PartitionedBlock<MatrixBlock>> bpmb = sec.getSparkContext().broadcast(pmb);
//step 2: second pass of X, compute tsmm/mapmm and aggregate result blocks
int outputDim = (int) (_type.isLeft() ? mc.getCols() : mc.getRows());
if (OptimizerUtils.estimateSize(outputDim, outputDim) <= 32 * 1024 * 1024) {
//default: <=32MB
//output large blocks and reduceAll to avoid skew on combineByKey
JavaRDD<MatrixBlock> tmp2 = in.map(new RDDTSMM2ExtFunction(bpmb, _type, outputDim, (int) mc.getRowsPerBlock()));
MatrixBlock out = RDDAggregateUtils.sumStable(tmp2);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out);
} else {
//output individual output blocks and aggregate by key (no action)
JavaPairRDD<MatrixIndexes, MatrixBlock> tmp2 = in.flatMapToPair(new RDDTSMM2Function(bpmb, _type));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDAggregateUtils.sumByKeyStable(tmp2, false);
//put output RDD handle into symbol table
sec.getMatrixCharacteristics(output.getName()).set(outputDim, outputDim, mc.getRowsPerBlock(), mc.getColsPerBlock());
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class TsmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
//execute tsmm instruction (always produce exactly one output block)
//(this formulation with values() requires --conf spark.driver.maxResultSize=0)
JavaRDD<MatrixBlock> tmp = in.map(new RDDTSMMFunction(_type));
MatrixBlock out = RDDAggregateUtils.sumStable(tmp);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out);
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class RmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
//get input rdds
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
//execute Spark RMM instruction
//step 1: prepare join keys (w/ replication), i/j/k
JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
//step 2: join prepared datasets, multiply, and aggregate
JavaPairRDD<MatrixIndexes, MatrixBlock> out = //join by result block
tmp1.join(tmp2).mapToPair(//do matrix multiplication
new RmmMultiplyFunction());
//aggregation per result block
out = RDDAggregateUtils.sumByKeyStable(out, false);
//put output block into symbol table (no lineage because single block)
updateBinaryMMOutputMatrixCharacteristics(sec, true);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class QuantileSortSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
boolean weighted = (input2 != null);
//get input rdds
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> inW = weighted ? sec.getBinaryBlockRDDHandleForVariable(input2.getName()) : null;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
long clen = -1;
if (!weighted) {
//W/O WEIGHTS (default)
out = RDDSortUtils.sortByVal(in, mc.getRows(), mc.getRowsPerBlock());
clen = 1;
} else {
//W/ WEIGHTS
out = RDDSortUtils.sortByVal(in, inW, mc.getRows(), mc.getRowsPerBlock());
clen = 2;
}
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
if (weighted)
sec.addLineageRDD(output.getName(), input2.getName());
//update output matrix characteristics
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), clen, mc.getRowsPerBlock(), mc.getColsPerBlock());
}
Aggregations