use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class BinarySPInstruction method processMatrixBVectorBinaryInstruction.
protected void processMatrixBVectorBinaryInstruction(ExecutionContext ec, VectorType vtype) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// sanity check dimensions
checkMatrixMatrixBinaryCharacteristics(sec);
// get input RDDs
String rddVar = input1.getName();
String bcastVar = input2.getName();
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(rddVar);
PartitionedBroadcast<MatrixBlock> in2 = sec.getBroadcastForVariable(bcastVar);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(rddVar);
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(bcastVar);
BinaryOperator bop = (BinaryOperator) _optr;
boolean isOuter = (mc1.getRows() > 1 && mc1.getCols() == 1 && mc2.getRows() == 1 && mc2.getCols() > 1);
// execute map binary operation
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
if (isOuter) {
out = in1.flatMapToPair(new OuterVectorBinaryOpFunction(bop, in2));
} else {
// default
// note: we use mappartition in order to preserve partitioning information for
// binary mv operations where the keys are guaranteed not to change, the reason
// why we cannot use mapValues is the need for broadcast key lookups.
// alternative: out = in1.mapToPair(new MatrixVectorBinaryOpFunction(bop, in2, vtype));
out = in1.mapPartitionsToPair(new MatrixVectorBinaryOpPartitionFunction(bop, in2, vtype), true);
}
// set output RDD
updateBinaryOutputMatrixCharacteristics(sec);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
sec.addLineageBroadcast(output.getName(), bcastVar);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class BuiltinNarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
boolean cbind = getOpcode().equals("cbind");
// compute output characteristics
MatrixCharacteristics mcOut = computeOutputMatrixCharacteristics(sec, inputs, cbind);
// get consolidated input via union over shifted and padded inputs
MatrixCharacteristics off = new MatrixCharacteristics(0, 0, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), 0);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
for (CPOperand input : inputs) {
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input.getName()).flatMapToPair(new ShiftMatrix(off, mcIn, cbind)).mapToPair(// just padding
new PadBlocksFunction(mcOut));
out = (out != null) ? out.union(in) : in;
updateMatrixCharacteristics(mcIn, off, cbind);
}
// aggregate partially overlapping blocks w/ single shuffle
int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
out = RDDAggregateUtils.mergeByKey(out, numPartOut, false);
// set output RDD and add lineage
sec.getMatrixCharacteristics(output.getName()).set(mcOut);
sec.setRDDHandleForVariable(output.getName(), out);
for (CPOperand input : inputs) sec.addLineageRDD(output.getName(), input.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class CheckpointSPInstruction method processInstruction.
@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// this is valid if relevant branches are never entered)
if (sec.getVariable(input1.getName()) == null || sec.getVariable(input1.getName()) instanceof BooleanObject) {
// add a dummy entry to the input, which will be immediately overwritten by the null output.
sec.setVariable(input1.getName(), new BooleanObject(false));
sec.setVariable(output.getName(), new BooleanObject(false));
return;
}
// -------
// (for csv input files with unknown dimensions, we might have generated a checkpoint after
// csvreblock although not necessary because the csvreblock was subject to in-memory reblock)
CacheableData<?> obj = sec.getCacheableData(input1.getName());
if (obj.isCached(true)) {
// available in memory
sec.setVariable(output.getName(), obj);
return;
}
// get input rdd handle (for matrix or frame)
JavaPairRDD<?, ?> in = sec.getRDDHandleForVariable(input1.getName(), InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
// Step 2: Checkpoint given rdd (only if currently in different storage level to prevent redundancy)
// -------
// Note that persist is an transformation which will be triggered on-demand with the next rdd operations
// This prevents unnecessary overhead if the dataset is only consumed by cp operations.
JavaPairRDD<?, ?> out = null;
if (!in.getStorageLevel().equals(_level)) {
// (trigger coalesce if intended number of partitions exceeded by 20%
// and not hash partitioned to avoid losing the existing partitioner)
int numPartitions = SparkUtils.getNumPreferredPartitions(mcIn, in);
boolean coalesce = (1.2 * numPartitions < in.getNumPartitions() && !SparkUtils.isHashPartitioned(in) && in.getNumPartitions() > SparkExecutionContext.getDefaultParallelism(true));
// checkpoint pre-processing rdd operations
if (coalesce) {
// merge partitions without shuffle if too many partitions
out = in.coalesce(numPartitions);
} else {
// apply a narrow shallow copy to allow for short-circuit collects
if (input1.getDataType() == DataType.MATRIX)
out = SparkUtils.copyBinaryBlockMatrix((JavaPairRDD<MatrixIndexes, MatrixBlock>) in, false);
else if (input1.getDataType() == DataType.FRAME)
out = ((JavaPairRDD<Long, FrameBlock>) in).mapValues(new CopyFrameBlockFunction(false));
}
// convert mcsr into memory-efficient csr if potentially sparse
if (input1.getDataType() == DataType.MATRIX && OptimizerUtils.checkSparseBlockCSRConversion(mcIn) && !_level.equals(Checkpoint.SER_STORAGE_LEVEL)) {
out = ((JavaPairRDD<MatrixIndexes, MatrixBlock>) out).mapValues(new CreateSparseBlockFunction(SparseBlock.Type.CSR));
}
// actual checkpoint into given storage level
out = out.persist(_level);
// otherwise these their nnz would never be evaluated due to lazy evaluation in spark
if (input1.isMatrix() && mcIn.dimsKnown() && !mcIn.dimsKnown(true) && !OptimizerUtils.isValidCPDimensions(mcIn)) {
mcIn.setNonZeros(SparkUtils.getNonZeros((JavaPairRDD<MatrixIndexes, MatrixBlock>) out));
}
} else {
// pass-through
out = in;
}
// Step 3: In-place update of input matrix/frame rdd handle and set as output
// -------
// We use this in-place approach for two reasons. First, it is correct because our checkpoint
// injection rewrites guarantee that after checkpoint instructions there are no consumers on the
// given input. Second, it is beneficial because otherwise we need to pass in-memory objects and
// filenames to the new matrix object in order to prevent repeated reads from hdfs and unnecessary
// caching and subsequent collects. Note that in-place update requires us to explicitly handle
// lineage information in order to prevent cycles on cleanup.
CacheableData<?> cd = sec.getCacheableData(input1.getName());
if (out != in) {
// prevent unnecessary lineage info
// guaranteed to exist (see above)
RDDObject inro = cd.getRDDHandle();
// create new rdd object
RDDObject outro = new RDDObject(out);
// mark as checkpointed
outro.setCheckpointRDD(true);
// keep lineage to prevent cycles on cleanup
outro.addLineageChild(inro);
cd.setRDDHandle(outro);
}
sec.setVariable(output.getName(), cd);
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class CompressionSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
// execute compression
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in.mapValues(new CompressionFunction());
// set outputs
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(input1.getName(), output.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class ConvolutionSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
if (instOpcode.equalsIgnoreCase("conv2d") || instOpcode.equalsIgnoreCase("conv2d_bias_add") || instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling")) {
String rddVar = input1.getName();
int numRowsPerBlock = 1;
JavaPairRDD<MatrixIndexes, MatrixBlock> inputRDD = reblockAsRectangularMatrices(sec, rddVar, numRowsPerBlock);
MatrixCharacteristics mcRdd = sec.getMatrixCharacteristics(rddVar);
// ------------------------------------
// TODO: Handle large filters > 2G
Broadcast<MatrixBlock> filterBroadcast = null;
Broadcast<MatrixBlock> biasBroadcast = null;
if (instOpcode.equalsIgnoreCase("conv2d")) {
filterBroadcast = getBroadcast(sec, _in2.getName());
} else if (instOpcode.equalsIgnoreCase("conv2d_bias_add")) {
filterBroadcast = getBroadcast(sec, _in3.getName());
biasBroadcast = getBroadcast(sec, _in2.getName());
}
// ------------------------------------
int pad_h = getScalarInput(ec, _padding, 0);
int pad_w = getScalarInput(ec, _padding, 1);
int stride_h = getScalarInput(ec, _stride, 0);
int stride_w = getScalarInput(ec, _stride, 1);
// int N = getScalarInput(ec, _input_shape, 0);
int C = getScalarInput(ec, _input_shape, 1);
int H = getScalarInput(ec, _input_shape, 2);
int W = getScalarInput(ec, _input_shape, 3);
int K = getScalarInput(ec, _filter_shape, 0);
int R = getScalarInput(ec, _filter_shape, 2);
int S = getScalarInput(ec, _filter_shape, 3);
int P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h);
int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w);
ConvolutionParameters params = new ConvolutionParameters(numRowsPerBlock, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, 1);
boolean enableNativeBLAS = NativeHelper.isNativeLibraryLoaded();
JavaPairRDD<MatrixIndexes, MatrixBlock> out = inputRDD.mapPartitionsToPair(new RDDConv2dMapMMFunction(filterBroadcast, params, instOpcode, biasBroadcast, mcRdd.getRows(), enableNativeBLAS), true);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), rddVar);
// TODO: Handle nnz
long nnz = -1;
long numCols = ((long) K) * ((long) P) * ((long) Q);
if (instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling")) {
numCols = ((long) C) * ((long) P) * ((long) Q);
}
if (numCols > Integer.MAX_VALUE) {
throw new DMLRuntimeException("The current operator doesnot support large outputs.");
}
sec.setMetaData(output.getName(), new MetaDataFormat(new MatrixCharacteristics(mcRdd.getRows(), numCols, numRowsPerBlock, (int) numCols, nnz), OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
} else {
throw new DMLRuntimeException("Not implemented: " + instOpcode);
}
}
Aggregations