use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class BinUaggChainSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
// execute unary builtin operation
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new RDDBinUaggChainFunction(_bOp, _uaggOp));
// set output RDD
updateUnaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class BinarySPInstruction method processMatrixMatrixBinaryInstruction.
/**
* Common binary matrix-matrix process instruction
*
* @param ec execution context
*/
protected void processMatrixMatrixBinaryInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// sanity check dimensions
checkMatrixMatrixBinaryCharacteristics(sec);
updateBinaryOutputMatrixCharacteristics(sec);
// Get input RDDs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
BinaryOperator bop = (BinaryOperator) _optr;
// vector replication if required (mv or outer operations)
boolean rowvector = (mc2.getRows() == 1 && mc1.getRows() > 1);
long numRepLeft = getNumReplicas(mc1, mc2, true);
long numRepRight = getNumReplicas(mc1, mc2, false);
if (numRepLeft > 1)
in1 = in1.flatMapToPair(new ReplicateVectorFunction(false, numRepLeft));
if (numRepRight > 1)
in2 = in2.flatMapToPair(new ReplicateVectorFunction(rowvector, numRepRight));
int numPrefPart = SparkUtils.isHashPartitioned(in1) ? in1.getNumPartitions() : SparkUtils.isHashPartitioned(in2) ? in2.getNumPartitions() : Math.min(in1.getNumPartitions() + in2.getNumPartitions(), 2 * SparkUtils.getNumPreferredPartitions(mcOut));
// execute binary operation
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.join(in2, numPrefPart).mapValues(new MatrixMatrixBinaryOpFunction(bop));
// set output RDD
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class BinarySPInstruction method processMatrixScalarBinaryInstruction.
protected void processMatrixScalarBinaryInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input RDD
String rddVar = (input1.getDataType() == DataType.MATRIX) ? input1.getName() : input2.getName();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
// get operator and scalar
CPOperand scalar = (input1.getDataType() == DataType.MATRIX) ? input2 : input1;
ScalarObject constant = (ScalarObject) ec.getScalarInput(scalar.getName(), scalar.getValueType(), scalar.isLiteral());
ScalarOperator sc_op = (ScalarOperator) _optr;
sc_op = sc_op.setConstant(constant.getDoubleValue());
// execute scalar matrix arithmetic instruction
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.mapValues(new MatrixScalarUnaryFunction(sc_op));
// put output RDD handle into symbol table
updateUnaryOutputMatrixCharacteristics(sec, rddVar, output.getName());
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class BinarySPInstruction method processMatrixBVectorBinaryInstruction.
protected void processMatrixBVectorBinaryInstruction(ExecutionContext ec, VectorType vtype) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// sanity check dimensions
checkMatrixMatrixBinaryCharacteristics(sec);
// get input RDDs
String rddVar = input1.getName();
String bcastVar = input2.getName();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(rddVar);
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(bcastVar);
BinaryOperator bop = (BinaryOperator) _optr;
boolean isOuter = (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && mc2.getCols() > 1);
// execute map binary operation
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (isOuter) {
out = in1.flatMapToPair(new OuterVectorBinaryOpFunction(bop, in2));
} else {
// default
// note: we use mappartition in order to preserve partitioning information for
// binary mv operations where the keys are guaranteed not to change, the reason
// why we cannot use mapValues is the need for broadcast key lookups.
// alternative: out = in1.mapToPair(new MatrixVectorBinaryOpFunction(bop, in2, vtype));
out = in1.mapPartitionsToPair(new MatrixVectorBinaryOpPartitionFunction(bop, in2, vtype), true);
}
// set output RDD
updateBinaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class RemoteDPParForSpark method runJob.
public static RemoteParForJobReturn runJob(long pfid, String itervar, String matrixvar, String program, HashMap<String, byte[]> clsMap, String resultFile, MatrixObject input, ExecutionContext ec, PartitionFormat dpf, OutputInfo oi, boolean tSparseCol, boolean enableCPCaching, int numReducers) {
String jobname = "ParFor-DPESP";
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaSparkContext sc = sec.getSparkContext();
// prepare input parameters
MatrixObject mo = sec.getMatrixObject(matrixvar);
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
// initialize accumulators for tasks/iterations, and inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
LongAccumulator aTasks = sc.sc().longAccumulator("tasks");
LongAccumulator aIters = sc.sc().longAccumulator("iterations");
// compute number of reducers (to avoid OOMs and reduce memory pressure)
int numParts = SparkUtils.getNumPreferredPartitions(mc, in);
int numReducers2 = Math.max(numReducers, Math.min(numParts, (int) dpf.getNumParts(mc)));
// core parfor datapartition-execute (w/ or w/o shuffle, depending on data characteristics)
RemoteDPParForSparkWorker efun = new RemoteDPParForSparkWorker(program, clsMap, matrixvar, itervar, enableCPCaching, mc, tSparseCol, dpf, oi, aTasks, aIters);
JavaPairRDD<Long, Writable> tmp = getPartitionedInput(sec, matrixvar, oi, dpf);
List<Tuple2<Long, String>> out = (requiresGrouping(dpf, mo) ? tmp.groupByKey(numReducers2) : tmp.map(new PseudoGrouping())).mapPartitionsToPair(// execute parfor tasks, incl cleanup
efun).collect();
// de-serialize results
LocalVariableMap[] results = RemoteParForUtils.getResults(out, LOG);
// get accumulator value
int numTasks = aTasks.value().intValue();
// get accumulator value
int numIters = aIters.value().intValue();
// create output symbol table entries
RemoteParForJobReturn ret = new RemoteParForJobReturn(true, numTasks, numIters, results);
// maintain statistics
Statistics.incrementNoOfCompiledSPInst();
Statistics.incrementNoOfExecutedSPInst();
if (DMLScript.STATISTICS) {
Statistics.maintainCPHeavyHitters(jobname, System.nanoTime() - t0);
}
return ret;
}
Aggregations