use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class CumulativeOffsetSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input2.getName());
long rlen = mc.getRows();
int brlen = mc.getRowsPerBlock();
// get inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> inData = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> inAgg = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
// prepare aggregates (cumsplit of offsets)
inAgg = inAgg.flatMapToPair(new RDDCumSplitFunction(_initValue, rlen, brlen));
// execute cumulative offset (apply cumulative op w/ offsets)
JavaPairRDD<MatrixIndexes, MatrixBlock> out = inData.join(inAgg).mapValues(new RDDCumOffsetFunction(_uop, _bop));
updateUnaryOutputMatrixCharacteristics(sec);
// put output handle in symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class AppendGSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
// general case append (map-extend, aggregate)
SparkExecutionContext sec = (SparkExecutionContext) ec;
checkBinaryAppendInputCharacteristics(sec, _cbind, false, false);
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
// General case: This one needs shifting and merging and hence has huge performance hit.
JavaPairRDD<MatrixIndexes, MatrixBlock> shifted_in2 = in2.flatMapToPair(new ShiftMatrix(mc1, mc2, _cbind));
out = in1.cogroup(shifted_in2).mapToPair(new MergeWithShiftedBlocks(mc1, mc2, _cbind));
// put output RDD handle into symbol table
updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class BuiltinNarySPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
boolean cbind = getOpcode().equals("cbind");
// compute output characteristics
MatrixCharacteristics mcOut = computeOutputMatrixCharacteristics(sec, inputs, cbind);
// get consolidated input via union over shifted and padded inputs
MatrixCharacteristics off = new MatrixCharacteristics(0, 0, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), 0);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = null;
for (CPOperand input : inputs) {
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(input.getName()).flatMapToPair(new ShiftMatrix(off, mcIn, cbind)).mapToPair(// just padding
new PadBlocksFunction(mcOut));
out = (out != null) ? out.union(in) : in;
updateMatrixCharacteristics(mcIn, off, cbind);
}
// aggregate partially overlapping blocks w/ single shuffle
int numPartOut = SparkUtils.getNumPreferredPartitions(mcOut);
out = RDDAggregateUtils.mergeByKey(out, numPartOut, false);
// set output RDD and add lineage
sec.getMatrixCharacteristics(output.getName()).set(mcOut);
sec.setRDDHandleForVariable(output.getName(), out);
for (CPOperand input : inputs) sec.addLineageRDD(output.getName(), input.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class CSVReblockSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// sanity check input info
CacheableData<?> obj = sec.getCacheableData(input1.getName());
MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
if (iimd.getInputInfo() != InputInfo.CSVInputInfo) {
throw new DMLRuntimeException("The given InputInfo is not implemented for " + "CSVReblockSPInstruction:" + iimd.getInputInfo());
}
// set output characteristics
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
mcOut.set(mcIn.getRows(), mcIn.getCols(), _brlen, _bclen);
// check for in-memory reblock (w/ lazy spark context, potential for latency reduction)
if (Recompiler.checkCPReblock(sec, input1.getName())) {
if (input1.getDataType() == DataType.MATRIX)
Recompiler.executeInMemoryMatrixReblock(sec, input1.getName(), output.getName());
else if (input1.getDataType() == DataType.FRAME)
Recompiler.executeInMemoryFrameReblock(sec, input1.getName(), output.getName());
return;
}
// execute matrix/frame csvreblock
JavaPairRDD<?, ?> out = null;
if (input1.getDataType() == DataType.MATRIX)
out = processMatrixCSVReblockInstruction(sec, mcOut);
else if (input1.getDataType() == DataType.FRAME)
out = processFrameCSVReblockInstruction(sec, mcOut, ((FrameObject) obj).getSchema());
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project systemml by apache.
the class CastSPInstruction method processInstruction.
@Override
@SuppressWarnings("unchecked")
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
String opcode = getOpcode();
// get input RDD and prepare output
JavaPairRDD<?, ?> in = sec.getRDDHandleForVariable(input1.getName(), InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics mcIn = sec.getMatrixCharacteristics(input1.getName());
JavaPairRDD<?, ?> out = null;
// convert frame-matrix / matrix-frame and set output
if (opcode.equals(UnaryCP.CAST_AS_MATRIX_OPCODE)) {
MatrixCharacteristics mcOut = new MatrixCharacteristics(mcIn);
mcOut.setBlockSize(ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
out = FrameRDDConverterUtils.binaryBlockToMatrixBlock((JavaPairRDD<Long, FrameBlock>) in, mcIn, mcOut);
} else if (opcode.equals(UnaryCP.CAST_AS_FRAME_OPCODE)) {
out = FrameRDDConverterUtils.matrixBlockToBinaryBlockLongIndex(sec.getSparkContext(), (JavaPairRDD<MatrixIndexes, MatrixBlock>) in, mcIn);
} else {
throw new DMLRuntimeException("Unsupported spark cast operation: " + opcode);
}
// update output statistics and add lineage
sec.setRDDHandleForVariable(output.getName(), out);
updateUnaryOutputMatrixCharacteristics(sec, input1.getName(), output.getName());
sec.addLineageRDD(output.getName(), input1.getName());
// update schema information for output frame
if (opcode.equals(UnaryCP.CAST_AS_FRAME_OPCODE)) {
sec.getFrameObject(output.getName()).setSchema(UtilFunctions.nCopies((int) mcIn.getCols(), ValueType.DOUBLE));
}
}
Aggregations