use of org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput in project incubator-systemml by apache.
the class BinaryMInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input1);
if (blkList == null)
return;
for (IndexedMatrixValue in1 : blkList) {
//allocate space for the output value
//try to avoid coping as much as possible
IndexedMatrixValue out;
if ((output != input1 && output != input2))
out = cachedValues.holdPlace(output, valueClass);
else
out = tempValue;
//get second
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
IndexedMatrixValue in2 = null;
if (_vectorType == VectorType.COL_VECTOR)
in2 = dcInput.getDataBlock((int) in1.getIndexes().getRowIndex(), 1);
else
//_vectorType == VectorType.ROW_VECTOR
in2 = dcInput.getDataBlock(1, (int) in1.getIndexes().getColumnIndex());
//process instruction
out.getIndexes().setIndexes(in1.getIndexes());
OperationsOnMatrixValues.performBinaryIgnoreIndexes(in1.getValue(), in2.getValue(), out.getValue(), ((BinaryOperator) optr));
//put the output value in the cache
if (out == tempValue)
cachedValues.add(output, out);
}
}
use of org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput in project incubator-systemml by apache.
the class AggregateBinaryInstruction method processMapMultInstruction.
/**
* Helper function to perform map-side matrix-matrix multiplication.
*
* @param valueClass matrix value class
* @param cachedValues cached value map
* @param in1 indexed matrix value 1
* @param in2 indexed matrix value 2
* @param blockRowFactor ?
* @param blockColFactor ?
* @throws DMLRuntimeException if DMLRuntimeException occurs
*/
private void processMapMultInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue in1, IndexedMatrixValue in2, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
boolean removeOutput = true;
if (_cacheType.isRight()) {
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
long in2_cols = dcInput.getNumCols();
long in2_colBlocks = (long) Math.ceil(((double) in2_cols) / dcInput.getNumColsPerBlock());
for (int bidx = 1; bidx <= in2_colBlocks; bidx++) {
// Matrix multiply A[i,k] %*% B[k,bid]
// Setup input2 block
IndexedMatrixValue in2Block = dcInput.getDataBlock((int) in1.getIndexes().getColumnIndex(), bidx);
MatrixValue in2BlockValue = in2Block.getValue();
MatrixIndexes in2BlockIndex = in2Block.getIndexes();
//allocate space for the output value
IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
//process instruction
OperationsOnMatrixValues.performAggregateBinary(in1.getIndexes(), in1.getValue(), in2BlockIndex, in2BlockValue, out.getIndexes(), out.getValue(), ((AggregateBinaryOperator) optr));
removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
}
} else {
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input1);
long in1_rows = dcInput.getNumRows();
long in1_rowsBlocks = (long) Math.ceil(((double) in1_rows) / dcInput.getNumRowsPerBlock());
for (int bidx = 1; bidx <= in1_rowsBlocks; bidx++) {
// Matrix multiply A[i,k] %*% B[k,bid]
// Setup input2 block
IndexedMatrixValue in1Block = dcInput.getDataBlock(bidx, (int) in2.getIndexes().getRowIndex());
MatrixValue in1BlockValue = in1Block.getValue();
MatrixIndexes in1BlockIndex = in1Block.getIndexes();
//allocate space for the output value
IndexedMatrixValue out = cachedValues.holdPlace(output, valueClass);
//process instruction
OperationsOnMatrixValues.performAggregateBinary(in1BlockIndex, in1BlockValue, in2.getIndexes(), in2.getValue(), out.getIndexes(), out.getValue(), ((AggregateBinaryOperator) optr));
removeOutput &= (!_outputEmptyBlocks && out.getValue().isEmpty());
}
}
//empty block output filter (enabled by compiler consumer operation is in CP)
if (removeOutput)
cachedValues.remove(output);
}
use of org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput in project incubator-systemml by apache.
the class GroupedAggregateMInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
ArrayList<IndexedMatrixValue> blkList = cachedValues.get(input1);
if (blkList == null)
return;
for (IndexedMatrixValue in1 : blkList) {
if (in1 == null)
continue;
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input2);
//get all inputs
MatrixIndexes ix = in1.getIndexes();
MatrixBlock groups = (MatrixBlock) dcInput.getDataBlock((int) ix.getRowIndex(), 1).getValue();
//output blocked result
int brlen = dcInput.getNumRowsPerBlock();
int bclen = dcInput.getNumColsPerBlock();
//execute map grouped aggregate operations
ArrayList<IndexedMatrixValue> outlist = new ArrayList<IndexedMatrixValue>();
OperationsOnMatrixValues.performMapGroupedAggregate(getOperator(), in1, groups, _ngroups, brlen, bclen, outlist);
//output all result blocks
for (IndexedMatrixValue out : outlist) {
cachedValues.add(output, out);
}
}
}
use of org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput in project incubator-systemml by apache.
the class UaggOuterChainInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
ArrayList<IndexedMatrixValue> blkList = null;
boolean rightCached = (_uaggOp.indexFn instanceof ReduceCol || _uaggOp.indexFn instanceof ReduceAll || !LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp));
//get the main data input
if (rightCached)
blkList = cachedValues.get(input1);
else
// ReduceRow
blkList = cachedValues.get(input2);
if (blkList == null)
return;
for (IndexedMatrixValue imv : blkList) {
if (imv == null)
continue;
MatrixIndexes in1Ix = imv.getIndexes();
MatrixValue in1Val = imv.getValue();
//allocate space for the intermediate and output value
IndexedMatrixValue iout = cachedValues.holdPlace(output, valueClass);
MatrixIndexes outIx = iout.getIndexes();
MatrixValue outVal = iout.getValue();
MatrixBlock corr = null;
//get the distributed cache input
byte dcInputIx = rightCached ? input2 : input1;
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(dcInputIx);
//process instruction
if (LibMatrixOuterAgg.isSupportedUaggOp(_uaggOp, _bOp)) {
if ((LibMatrixOuterAgg.isRowIndexMax(_uaggOp)) || (LibMatrixOuterAgg.isRowIndexMin(_uaggOp))) {
if (_bv == null) {
if (rightCached)
_bv = dcInput.getRowVectorArray();
else
_bv = dcInput.getColumnVectorArray();
_bvi = LibMatrixOuterAgg.prepareRowIndices(_bv.length, _bv, _bOp, _uaggOp);
}
} else {
//step 1: prepare sorted rhs input (once per task)
if (_bv == null) {
if (rightCached)
_bv = dcInput.getRowVectorArray();
else
_bv = dcInput.getColumnVectorArray();
Arrays.sort(_bv);
}
}
LibMatrixOuterAgg.resetOutputMatix(in1Ix, (MatrixBlock) in1Val, outIx, (MatrixBlock) outVal, _uaggOp);
LibMatrixOuterAgg.aggregateMatrix((MatrixBlock) in1Val, (MatrixBlock) outVal, _bv, _bvi, _bOp, _uaggOp);
} else //default case
{
long in2_cols = dcInput.getNumCols();
long in2_colBlocks = (long) Math.ceil(((double) in2_cols) / dcInput.getNumColsPerBlock());
for (int bidx = 1; bidx <= in2_colBlocks; bidx++) {
IndexedMatrixValue imv2 = dcInput.getDataBlock(1, bidx);
MatrixValue in2Val = imv2.getValue();
//outer block operation
OperationsOnMatrixValues.performBinaryIgnoreIndexes(in1Val, in2Val, _tmpVal1, _bOp);
//unary aggregate operation
OperationsOnMatrixValues.performAggregateUnary(in1Ix, _tmpVal1, outIx, _tmpVal2, _uaggOp, blockRowFactor, blockColFactor);
//aggregate over all rhs blocks
if (corr == null) {
outVal.reset(_tmpVal2.getNumRows(), _tmpVal2.getNumColumns(), false);
corr = new MatrixBlock(_tmpVal2.getNumRows(), _tmpVal2.getNumColumns(), false);
}
if (_aggOp.correctionExists)
OperationsOnMatrixValues.incrementalAggregation(outVal, corr, _tmpVal2, _aggOp, true);
else
OperationsOnMatrixValues.incrementalAggregation(outVal, null, _tmpVal2, _aggOp, true);
}
}
}
}
use of org.apache.sysml.runtime.matrix.mapred.DistributedCacheInput in project incubator-systemml by apache.
the class PMMJMRInstruction method processInstruction.
@Override
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue tempValue, IndexedMatrixValue zeroInput, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
//get both matrix inputs (left side always permutation)
DistributedCacheInput dcInput = MRBaseForCommonInstructions.dcValues.get(input1);
IndexedMatrixValue in2 = cachedValues.getFirst(input2);
IndexedMatrixValue in1 = dcInput.getDataBlock((int) in2.getIndexes().getRowIndex(), 1);
MatrixBlock mb1 = (MatrixBlock) in1.getValue();
MatrixBlock mb2 = (MatrixBlock) in2.getValue();
//compute target block indexes
long minPos = UtilFunctions.toLong(mb1.minNonZero());
long maxPos = UtilFunctions.toLong(mb1.max());
long rowIX1 = (minPos - 1) / blockRowFactor + 1;
long rowIX2 = (maxPos - 1) / blockRowFactor + 1;
boolean multipleOuts = (rowIX1 != rowIX2);
if (//at least one row selected
minPos >= 1) {
//output sparsity estimate
double spmb1 = OptimizerUtils.getSparsity(mb1.getNumRows(), 1, mb1.getNonZeros());
long estnnz = (long) (spmb1 * mb2.getNonZeros());
boolean sparse = MatrixBlock.evalSparseFormatInMemory(blockRowFactor, mb2.getNumColumns(), estnnz);
//compute and allocate output blocks
IndexedMatrixValue out1 = cachedValues.holdPlace(output, valueClass);
IndexedMatrixValue out2 = multipleOuts ? cachedValues.holdPlace(output, valueClass) : null;
out1.getValue().reset(blockRowFactor, mb2.getNumColumns(), sparse);
if (out2 != null)
out2.getValue().reset(UtilFunctions.computeBlockSize(_rlen, rowIX2, blockRowFactor), mb2.getNumColumns(), sparse);
//compute core matrix permutation (assumes that out1 has default blocksize,
//hence we do a meta data correction afterwards)
mb1.permutationMatrixMultOperations(mb2, out1.getValue(), (out2 != null) ? out2.getValue() : null);
((MatrixBlock) out1.getValue()).setNumRows(UtilFunctions.computeBlockSize(_rlen, rowIX1, blockRowFactor));
out1.getIndexes().setIndexes(rowIX1, in2.getIndexes().getColumnIndex());
if (out2 != null)
out2.getIndexes().setIndexes(rowIX2, in2.getIndexes().getColumnIndex());
//empty block output filter (enabled by compiler consumer operation is in CP)
if (!_outputEmptyBlocks && out1.getValue().isEmpty() && (out2 == null || out2.getValue().isEmpty())) {
cachedValues.remove(output);
}
}
}
Aggregations