use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class CovarianceSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
COVOperator cop = ((COVOperator) _optr);
// get input
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
// process central moment instruction
CM_COV_Object cmobj = null;
if (input3 == null) {
// w/o weights
cmobj = in1.join(in2).values().map(new RDDCOVFunction(cop)).fold(new CM_COV_Object(), new RDDCOVReduceFunction(cop));
} else {
// with weights
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
cmobj = in1.join(in2).join(in3).values().map(new RDDCOVWeightsFunction(cop)).fold(new CM_COV_Object(), new RDDCOVReduceFunction(cop));
}
// create scalar output (no lineage information required)
double val = cmobj.getRequiredResult(_optr);
ec.setScalarOutput(output.getName(), new DoubleObject(val));
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class CpmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get rdd inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
// prune empty blocks of ultra-sparse matrices
in1 = in1.filter(new FilterNonEmptyBlocksFunction());
in2 = in2.filter(new FilterNonEmptyBlocksFunction());
}
// compute preferred join degree of parallelism
int numPreferred = getPreferredParJoin(mc1, mc2, in1.getNumPartitions(), in2.getNumPartitions());
int numPartJoin = Math.min(getMaxParJoin(mc1, mc2), numPreferred);
// process core cpmm matrix multiply
JavaPairRDD<Long, IndexedMatrixValue> tmp1 = in1.mapToPair(new CpmmIndexFunction(true));
JavaPairRDD<Long, IndexedMatrixValue> tmp2 = in2.mapToPair(new CpmmIndexFunction(false));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join over common dimension
numPartJoin).mapToPair(// compute block multiplications
new CpmmMultiplyFunction());
// process cpmm aggregation and handle outputs
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
// prune empty blocks and aggregate all results
out = out.filter(new FilterNonEmptyBlocksFunction());
MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
} else {
// DEFAULT: MULTI_BLOCK
out = RDDAggregateUtils.sumByKeyStable(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
// update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class CtableSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
double scalar_input2 = -1, scalar_input3 = -1;
Ctable.OperationTypes ctableOp = Ctable.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// First get the block sizes and then set them as -1 to allow for binary cell reblock
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
boolean setLineage2 = false;
boolean setLineage3 = false;
switch(ctableOp) {
case // (VECTOR)
CTABLE_TRANSFORM:
// F=ctable(A,B,W)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage2 = true;
setLineage3 = true;
inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_EXPAND_SCALAR_WEIGHT:
// F = ctable(seq,A) or F = ctable(seq,B,1)
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
if (scalar_input3 == 1) {
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
break;
}
case // (VECTOR/MATRIX)
CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.mapToPair(new MapMBIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage3 = true;
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
default:
throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
}
// Now perform aggregation on ctables to get binaryCells
if (bincellsNoFilter == null && ctables != null) {
bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
} else if (!(bincellsNoFilter != null && ctables == null)) {
throw new DMLRuntimeException("Incorrect ctable operation");
}
// handle known/unknown dimensions
long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
MatrixCharacteristics mcBinaryCells = null;
boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
if (!findDimensions) {
if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
else
mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
// filtering according to given dimensions
bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
}
// convert double values to matrix cell
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
// find dimensions if necessary (w/ cache for reblock)
if (findDimensions) {
binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
}
// store output rdd handle
sec.setRDDHandleForVariable(output.getName(), binaryCells);
mcOut.set(mcBinaryCells);
// Since we are outputing binary cells, we set block sizes = -1
mcOut.setRowsPerBlock(-1);
mcOut.setColsPerBlock(-1);
sec.addLineageRDD(output.getName(), input1.getName());
if (setLineage2)
sec.addLineageRDD(output.getName(), input2.getName());
if (setLineage3)
sec.addLineageRDD(output.getName(), input3.getName());
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class FrameAppendRSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
JavaPairRDD<Long, FrameBlock> in1 = sec.getFrameBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<Long, FrameBlock> in2 = sec.getFrameBinaryBlockRDDHandleForVariable(input2.getName());
JavaPairRDD<Long, FrameBlock> out = null;
long leftRows = sec.getMatrixCharacteristics(input1.getName()).getRows();
if (_cbind) {
JavaPairRDD<Long, FrameBlock> in1Aligned = in1.mapToPair(new ReduceSideAppendAlignFunction(leftRows));
in1Aligned = FrameRDDAggregateUtils.mergeByKey(in1Aligned);
JavaPairRDD<Long, FrameBlock> in2Aligned = in2.mapToPair(new ReduceSideAppendAlignFunction(leftRows));
in2Aligned = FrameRDDAggregateUtils.mergeByKey(in2Aligned);
out = in1Aligned.join(in2Aligned).mapValues(new ReduceSideColumnsFunction(_cbind));
} else {
// rbind
JavaPairRDD<Long, FrameBlock> right = in2.mapToPair(new ReduceSideAppendRowsFunction(leftRows));
out = in1.union(right);
}
// put output RDD handle into symbol table
updateBinaryAppendOutputMatrixCharacteristics(sec, _cbind);
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
// update schema of output with merged input schemas
sec.getFrameObject(output.getName()).setSchema(sec.getFrameObject(input1.getName()).mergeSchemas(sec.getFrameObject(input2.getName())));
}
use of org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext in project incubator-systemml by apache.
the class MapmmChainSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get rdd and broadcast inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> inX = sec.getBinaryBlockRDDHandleForVariable(_input1.getName());
PartitionedBroadcast<MatrixBlock> inV = sec.getBroadcastForVariable(_input2.getName());
// execute mapmmchain (guaranteed to have single output block)
MatrixBlock out = null;
if (_chainType == ChainType.XtXv) {
JavaRDD<MatrixBlock> tmp = inX.values().map(new RDDMapMMChainFunction(inV));
out = RDDAggregateUtils.sumStable(tmp);
} else {
// ChainType.XtwXv / ChainType.XtXvy
PartitionedBroadcast<MatrixBlock> inW = sec.getBroadcastForVariable(_input3.getName());
JavaRDD<MatrixBlock> tmp = inX.map(new RDDMapMMChainFunction2(inV, inW, _chainType));
out = RDDAggregateUtils.sumStable(tmp);
}
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(_output.getName(), out, getExtendedOpcode());
}
Aggregations