use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class PmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String rddVar = (_type == CacheType.LEFT) ? input2.getName() : input1.getName();
String bcastVar = (_type == CacheType.LEFT) ? input1.getName() : input2.getName();
MatrixCharacteristics mc = sec.getMatrixCharacteristics(output.getName());
long rlen = sec.getScalarInput(_nrow.getName(), _nrow.getValueType(), _nrow.isLiteral()).getLongValue();
// get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
// execute pmm instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new RDDPMMFunction(_type, in2, rlen, mc.getRowsPerBlock()));
out = RDDAggregateUtils.sumByKeyStable(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
// update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, false);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class QuantilePickSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdds
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
// (in contrast to cp instructions, w/o weights does not materializes weights of 1)
switch(_type) {
case VALUEPICK:
{
ScalarObject quantile = ec.getScalarInput(input2);
double[] wt = getWeightedQuantileSummary(in, mc, quantile.getDoubleValue());
ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
break;
}
case MEDIAN:
{
double[] wt = getWeightedQuantileSummary(in, mc, 0.5);
ec.setScalarOutput(output.getName(), new DoubleObject(wt[3]));
break;
}
case IQM:
{
double[] wt = getWeightedQuantileSummary(in, mc, 0.25, 0.75);
long key25 = (long) Math.ceil(wt[1]);
long key75 = (long) Math.ceil(wt[2]);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.filter(new FilterFunction(key25 + 1, key75, mc.getRowsPerBlock())).mapToPair(new ExtractAndSumFunction(key25 + 1, key75, mc.getRowsPerBlock()));
double sum = RDDAggregateUtils.sumStable(out).getValue(0, 0);
double val = MatrixBlock.computeIQMCorrection(sum, wt[0], wt[3], wt[5], wt[4], wt[6]);
ec.setScalarOutput(output.getName(), new DoubleObject(val));
break;
}
default:
throw new DMLRuntimeException("Unsupported qpick operation type: " + _type);
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class ReblockSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// set the output characteristics
CacheableData<?> obj = sec.getCacheableData(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mc.getRows(), mc.getCols(), brlen, bclen, mc.getNonZeros());
// get the source format form the meta data
MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
if (iimd == null)
throw new DMLRuntimeException("Error: Metadata not found");
InputInfo iinfo = iimd.getInputInfo();
// check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if (Recompiler.checkCPReblock(sec, input1.getName())) {
if (input1.getDataType() == DataType.MATRIX)
Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
else if (input1.getDataType() == DataType.FRAME)
Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
return;
}
// execute matrix/frame reblock
if (input1.getDataType() == DataType.MATRIX)
processMatrixReblockInstruction(sec, iinfo);
else if (input1.getDataType() == DataType.FRAME)
processFrameReblockInstruction(sec, iinfo);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class RmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdds
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mcOut = updateBinaryMMOutputMatrixCharacteristics(sec, true);
// execute Spark RMM instruction
// step 1: prepare join keys (w/ shallow replication), i/j/k
JavaPairRDD<TripleIndexes, MatrixBlock> tmp1 = in1.flatMapToPair(new RmmReplicateFunction(mc2.getCols(), mc2.getColsPerBlock(), true));
JavaPairRDD<TripleIndexes, MatrixBlock> tmp2 = in2.flatMapToPair(new RmmReplicateFunction(mc1.getRows(), mc1.getRowsPerBlock(), false));
// step 2: join prepared datasets, multiply, and aggregate
int numPartJoin = Math.max(getNumJoinPartitions(mc1, mc2), SparkExecutionContext.getDefaultParallelism(true));
int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join by result block
numPartJoin).mapToPair(// do matrix multiplication
new RmmMultiplyFunction());
out = // aggregation per result block
RDDAggregateUtils.sumByKeyStable(// aggregation per result block
out, numPartOut, false);
// put output block into symbol table (no lineage because single block)
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class TernarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = !input1.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = !input2.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = !input3.isMatrix() ? null : sec.getBinaryBlockRDDHandleForVariable(input3.getName());
MatrixBlock m1 = input1.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input1).getDoubleValue());
MatrixBlock m2 = input2.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input2).getDoubleValue());
MatrixBlock m3 = input3.isMatrix() ? null : new MatrixBlock(ec.getScalarInput(input3).getDoubleValue());
TernaryOperator op = (TernaryOperator) _optr;
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (input1.isMatrix() && !input2.isMatrix() && !input3.isMatrix())
out = in1.mapValues(new TernaryFunctionMSS(op, m1, m2, m3));
else if (!input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
out = in2.mapValues(new TernaryFunctionSMS(op, m1, m2, m3));
else if (!input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
out = in3.mapValues(new TernaryFunctionSSM(op, m1, m2, m3));
else if (input1.isMatrix() && input2.isMatrix() && !input3.isMatrix())
out = in1.join(in2).mapValues(new TernaryFunctionMMS(op, m1, m2, m3));
else if (input1.isMatrix() && !input2.isMatrix() && input3.isMatrix())
out = in1.join(in3).mapValues(new TernaryFunctionMSM(op, m1, m2, m3));
else if (!input1.isMatrix() && input2.isMatrix() && input3.isMatrix())
out = in2.join(in3).mapValues(new TernaryFunctionSMM(op, m1, m2, m3));
else
// all matrices
out = in1.join(in2).join(in3).mapValues(new TernaryFunctionMMM(op, m1, m2, m3));
// set output RDD
updateTernaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
if (input1.isMatrix())
sec.addLineageRDD(output.getName(), input1.getName());
if (input2.isMatrix())
sec.addLineageRDD(output.getName(), input2.getName());
if (input3.isMatrix())
sec.addLineageRDD(output.getName(), input3.getName());
}
Aggregations