use of org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction in project incubator-systemml by apache.
the class ReorgSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
//get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
if (//TRANSPOSE
opcode.equalsIgnoreCase("r'")) {
//execute transpose reorg operation
out = in1.mapToPair(new ReorgMapFunction(opcode));
} else if (//REVERSE
opcode.equalsIgnoreCase("rev")) {
//execute reverse reorg operation
out = in1.flatMapToPair(new RDDRevFunction(mcIn));
if (mcIn.getRows() % mcIn.getRowsPerBlock() != 0)
out = RDDAggregateUtils.mergeByKey(out, false);
} else if (// DIAG
opcode.equalsIgnoreCase("rdiag")) {
if (mcIn.getCols() == 1) {
// diagV2M
out = in1.flatMapToPair(new RDDDiagV2MFunction(mcIn));
} else {
// diagM2V
//execute diagM2V operation
out = in1.filter(new FilterDiagBlocksFunction()).mapToPair(new ReorgMapFunction(opcode));
}
} else if (//ORDER
opcode.equalsIgnoreCase("rsort")) {
// Sort by column 'col' in ascending/descending order and return either index/value
//get parameters
long col = ec.getScalarInput(_col.getName(), _col.getValueType(), _col.isLiteral()).getLongValue();
boolean desc = ec.getScalarInput(_desc.getName(), _desc.getValueType(), _desc.isLiteral()).getBooleanValue();
boolean ixret = ec.getScalarInput(_ixret.getName(), _ixret.getValueType(), _ixret.isLiteral()).getBooleanValue();
boolean singleCol = (mcIn.getCols() == 1);
// extract column (if necessary) and sort
out = in1;
if (!singleCol) {
out = out.filter(new IsBlockInRange(1, mcIn.getRows(), col, col, mcIn)).mapValues(new ExtractColumn((int) UtilFunctions.computeCellInBlock(col, mcIn.getColsPerBlock())));
}
//actual index/data sort operation
if (ixret) {
//sort indexes
out = RDDSortUtils.sortIndexesByVal(out, !desc, mcIn.getRows(), mcIn.getRowsPerBlock());
} else if (singleCol && !desc) {
//sort single-column matrix
out = RDDSortUtils.sortByVal(out, mcIn.getRows(), mcIn.getRowsPerBlock());
} else {
//sort multi-column matrix
if (!_bSortIndInMem)
out = RDDSortUtils.sortDataByVal(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock());
else
out = RDDSortUtils.sortDataByValMemSort(out, in1, !desc, mcIn.getRows(), mcIn.getCols(), mcIn.getRowsPerBlock(), mcIn.getColsPerBlock(), sec, (ReorgOperator) _optr);
}
} else {
throw new DMLRuntimeException("Error: Incorrect opcode in ReorgSPInstruction:" + opcode);
}
//store output rdd handle
updateReorgMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.instructions.spark.functions.FilterDiagBlocksFunction in project incubator-systemml by apache.
the class AggregateUnarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) throws DMLRuntimeException {
SparkExecutionContext sec = (SparkExecutionContext) ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
//get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in;
//filter input blocks for trace
if (getOpcode().equalsIgnoreCase("uaktrace"))
out = out.filter(new FilterDiagBlocksFunction());
//execute unary aggregate operation
AggregateUnaryOperator auop = (AggregateUnaryOperator) _optr;
AggregateOperator aggop = _aop;
//perform aggregation if necessary and put output into symbol table
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
JavaRDD<MatrixBlock> out2 = out.map(new RDDUAggFunction2(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
MatrixBlock out3 = RDDAggregateUtils.aggStable(out2, aggop);
//drop correction after aggregation
out3.dropLastRowsOrColums(aggop.correctionLocation);
//put output block into symbol table (no lineage because single block)
//this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out3);
} else //MULTI_BLOCK or NONE
{
if (_aggtype == SparkAggType.NONE) {
//in case of no block aggregation, we always drop the correction as well as
//use a partitioning-preserving mapvalues
out = out.mapValues(new RDDUAggValueFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
} else if (_aggtype == SparkAggType.MULTI_BLOCK) {
//in case of multi-block aggregation, we always keep the correction
out = out.mapToPair(new RDDUAggFunction(auop, mc.getRowsPerBlock(), mc.getColsPerBlock()));
out = RDDAggregateUtils.aggByKeyStable(out, aggop, false);
//partitioning, drop correction via partitioning-preserving mapvalues)
if (auop.aggOp.correctionExists)
out = out.mapValues(new AggregateDropCorrectionFunction(aggop));
}
//put output RDD handle into symbol table
updateUnaryAggOutputMatrixCharacteristics(sec, auop.indexFn);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
}
Aggregations