use of org.apache.sysml.runtime.matrix.data.CTableMap in project systemml by apache.
the class CtableSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
double scalar_input2 = -1, scalar_input3 = -1;
Ctable.OperationTypes ctableOp = Ctable.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// First get the block sizes and then set them as -1 to allow for binary cell reblock
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
boolean setLineage2 = false;
boolean setLineage3 = false;
switch(ctableOp) {
case // (VECTOR)
CTABLE_TRANSFORM:
// F=ctable(A,B,W)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage2 = true;
setLineage3 = true;
inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_EXPAND_SCALAR_WEIGHT:
// F = ctable(seq,A) or F = ctable(seq,B,1)
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
if (scalar_input3 == 1) {
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
break;
}
case // (VECTOR/MATRIX)
CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.mapToPair(new MapMBIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage3 = true;
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
default:
throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
}
// Now perform aggregation on ctables to get binaryCells
if (bincellsNoFilter == null && ctables != null) {
bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
} else if (!(bincellsNoFilter != null && ctables == null)) {
throw new DMLRuntimeException("Incorrect ctable operation");
}
// handle known/unknown dimensions
long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
MatrixCharacteristics mcBinaryCells = null;
boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
if (!findDimensions) {
if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
else
mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
// filtering according to given dimensions
bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
}
// convert double values to matrix cell
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
// find dimensions if necessary (w/ cache for reblock)
if (findDimensions) {
binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
}
// store output rdd handle
sec.setRDDHandleForVariable(output.getName(), binaryCells);
mcOut.set(mcBinaryCells);
// Since we are outputing binary cells, we set block sizes = -1
mcOut.setRowsPerBlock(-1);
mcOut.setColsPerBlock(-1);
sec.addLineageRDD(output.getName(), input1.getName());
if (setLineage2)
sec.addLineageRDD(output.getName(), input2.getName());
if (setLineage3)
sec.addLineageRDD(output.getName(), input3.getName());
}
use of org.apache.sysml.runtime.matrix.data.CTableMap in project systemml by apache.
the class GMRCtableBuffer method flushBuffer.
public void flushBuffer(Reporter reporter) throws RuntimeException {
try {
if (_mapBuffer != null) {
// new MatrixIndexes();
MatrixIndexes key = null;
MatrixCell value = new MatrixCell();
for (Entry<Byte, CTableMap> ctable : _mapBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
CTableMap resultMap = ctable.getValue();
// maintain result dims and nonzeros
for (Integer i : resultIDs) {
_resultNonZeros[i] += resultMap.size();
if (_resultDimsUnknown[i] == (byte) 1) {
_resultMaxRowDims[i] = Math.max(resultMap.getMaxRow(), _resultMaxRowDims[i]);
_resultMaxColDims[i] = Math.max(resultMap.getMaxColumn(), _resultMaxColDims[i]);
}
}
// output result data
Iterator<ADoubleEntry> iter = resultMap.getIterator();
while (iter.hasNext()) {
ADoubleEntry e = iter.next();
key = new MatrixIndexes(e.getKey1(), e.getKey2());
value.setValue(e.value);
for (Integer i : resultIDs) _collector.collectOutput(key, value, i, reporter);
}
}
} else if (_blockBuffer != null) {
MatrixIndexes key = new MatrixIndexes(1, 1);
// DataConverter.writeBinaryBlockMatrixToHDFS(path, job, mat, mc.get_rows(), mc.get_cols(), mc.get_rows_per_block(), mc.get_cols_per_block(), replication);
for (Entry<Byte, MatrixBlock> ctable : _blockBuffer.entrySet()) {
ArrayList<Integer> resultIDs = ReduceBase.getOutputIndexes(ctable.getKey(), _resultIndexes);
MatrixBlock outBlock = ctable.getValue();
outBlock.recomputeNonZeros();
// TODO: change hard coding of 1000
int brlen = 1000, bclen = 1000;
int rlen = outBlock.getNumRows();
int clen = outBlock.getNumColumns();
// final output matrix is smaller than a single block
if (rlen <= brlen && clen <= brlen) {
key = new MatrixIndexes(1, 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, outBlock, i, reporter);
_resultNonZeros[i] += outBlock.getNonZeros();
}
} else {
// Following code is similar to that in DataConverter.DataConverter.writeBinaryBlockMatrixToHDFS
// initialize blocks for reuse (at most 4 different blocks required)
MatrixBlock[] blocks = MatrixWriter.createMatrixBlocksForReuse(rlen, clen, brlen, bclen, true, outBlock.getNonZeros());
// create and write subblocks of matrix
for (int blockRow = 0; blockRow < (int) Math.ceil(rlen / (double) brlen); blockRow++) {
for (int blockCol = 0; blockCol < (int) Math.ceil(clen / (double) bclen); blockCol++) {
int maxRow = (blockRow * brlen + brlen < rlen) ? brlen : rlen - blockRow * brlen;
int maxCol = (blockCol * bclen + bclen < clen) ? bclen : clen - blockCol * bclen;
int row_offset = blockRow * brlen;
int col_offset = blockCol * bclen;
// get reuse matrix block
MatrixBlock block = MatrixWriter.getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen);
// copy submatrix to block
outBlock.slice(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block);
// TODO: skip empty "block"
// append block to sequence file
key.setIndexes(blockRow + 1, blockCol + 1);
for (Integer i : resultIDs) {
_collector.collectOutput(key, block, i, reporter);
_resultNonZeros[i] += block.getNonZeros();
}
// reset block for later reuse
block.reset();
}
}
}
}
} else {
throw new DMLRuntimeException("Unexpected.. both ctable buffers are empty.");
}
} catch (Exception ex) {
throw new RuntimeException("Failed to flush ctable buffer.", ex);
}
// remove existing partial ctables
if (_mapBuffer != null)
_mapBuffer.clear();
else
_blockBuffer.clear();
}
use of org.apache.sysml.runtime.matrix.data.CTableMap in project systemml by apache.
the class CtableCPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName(), getExtendedOpcode());
MatrixBlock matBlock2 = null, wtBlock = null;
double cst1, cst2;
CTableMap resultMap = new CTableMap(EntryType.INT);
MatrixBlock resultBlock = null;
Ctable.OperationTypes ctableOp = findCtableOperation();
ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (ec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (ec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
boolean outputDimsKnown = (outputDim1 != -1 && outputDim2 != -1);
if (outputDimsKnown) {
int inputRows = matBlock1.getNumRows();
int inputCols = matBlock1.getNumColumns();
boolean sparse = MatrixBlock.evalSparseFormatInMemory(outputDim1, outputDim2, inputRows * inputCols);
// blocks because it would implicitly turn the O(N) algorithm into O(N log N).
if (!sparse)
resultBlock = new MatrixBlock((int) outputDim1, (int) outputDim2, false);
}
if (_isExpand) {
resultBlock = new MatrixBlock(matBlock1.getNumRows(), Integer.MAX_VALUE, true);
}
switch(ctableOp) {
case // (VECTOR)
CTABLE_TRANSFORM:
// F=ctable(A,B,W)
matBlock2 = ec.getMatrixInput(input2.getName(), getExtendedOpcode());
wtBlock = ec.getMatrixInput(input3.getName(), getExtendedOpcode());
matBlock1.ctableOperations((SimpleOperator) _optr, matBlock2, wtBlock, resultMap, resultBlock);
break;
case // (VECTOR/MATRIX)
CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
matBlock2 = ec.getMatrixInput(input2.getName(), getExtendedOpcode());
cst1 = ec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
matBlock1.ctableOperations((SimpleOperator) _optr, matBlock2, cst1, _ignoreZeros, resultMap, resultBlock);
break;
case // (VECTOR)
CTABLE_EXPAND_SCALAR_WEIGHT:
// F = ctable(seq,A) or F = ctable(seq,B,1)
matBlock2 = ec.getMatrixInput(input2.getName(), getExtendedOpcode());
cst1 = ec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
// only resultBlock.rlen known, resultBlock.clen set in operation
matBlock1.ctableOperations((SimpleOperator) _optr, matBlock2, cst1, resultBlock);
break;
case // (VECTOR)
CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
cst1 = ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
cst2 = ec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
matBlock1.ctableOperations((SimpleOperator) _optr, cst1, cst2, resultMap, resultBlock);
break;
case // (VECTOR)
CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
wtBlock = ec.getMatrixInput(input3.getName(), getExtendedOpcode());
cst1 = ec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
matBlock1.ctableOperations((SimpleOperator) _optr, cst1, wtBlock, resultMap, resultBlock);
break;
default:
throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
}
if (input1.getDataType() == DataType.MATRIX)
ec.releaseMatrixInput(input1.getName(), getExtendedOpcode());
if (input2.getDataType() == DataType.MATRIX)
ec.releaseMatrixInput(input2.getName(), getExtendedOpcode());
if (input3.getDataType() == DataType.MATRIX)
ec.releaseMatrixInput(input3.getName(), getExtendedOpcode());
if (resultBlock == null) {
// decided for hash-aggregation just to prevent inefficiency in case of sparse outputs.
if (outputDimsKnown)
resultBlock = DataConverter.convertToMatrixBlock(resultMap, (int) outputDim1, (int) outputDim2);
else
resultBlock = DataConverter.convertToMatrixBlock(resultMap);
} else
resultBlock.examSparsity();
// such as ctable expand (guarded by released input memory)
if (checkGuardedRepresentationChange(matBlock1, matBlock2, resultBlock)) {
resultBlock.examSparsity();
}
ec.setMatrixOutput(output.getName(), resultBlock, getExtendedOpcode());
}
use of org.apache.sysml.runtime.matrix.data.CTableMap in project incubator-systemml by apache.
the class TernaryInstruction method processInstruction.
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue zeroInput, HashMap<Byte, CTableMap> resultMaps, HashMap<Byte, MatrixBlock> resultBlocks, int blockRowFactor, int blockColFactor) throws DMLRuntimeException {
IndexedMatrixValue in1, in2, in3 = null;
in1 = cachedValues.getFirst(input1);
CTableMap ctableResult = null;
MatrixBlock ctableResultBlock = null;
if (knownOutputDims()) {
if (resultBlocks != null) {
ctableResultBlock = resultBlocks.get(output);
if (ctableResultBlock == null) {
// From MR, output of ctable is set to be sparse since it is built from a single input block.
ctableResultBlock = new MatrixBlock((int) _outputDim1, (int) _outputDim2, true);
resultBlocks.put(output, ctableResultBlock);
}
} else {
throw new DMLRuntimeException("Unexpected error in processing table instruction.");
}
} else {
//prepare aggregation maps
ctableResult = resultMaps.get(output);
if (ctableResult == null) {
ctableResult = new CTableMap();
resultMaps.put(output, ctableResult);
}
}
//get inputs and process instruction
switch(_op) {
case CTABLE_TRANSFORM:
{
in2 = cachedValues.getFirst(input2);
in3 = cachedValues.getFirst(input3);
if (in1 == null || in2 == null || in3 == null)
return;
OperationsOnMatrixValues.performTernary(in1.getIndexes(), in1.getValue(), in2.getIndexes(), in2.getValue(), in3.getIndexes(), in3.getValue(), ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_SCALAR_WEIGHT:
{
// 3rd input is a scalar
in2 = cachedValues.getFirst(input2);
if (in1 == null || in2 == null)
return;
OperationsOnMatrixValues.performTernary(in1.getIndexes(), in1.getValue(), in2.getIndexes(), in2.getValue(), scalar_input3, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_EXPAND_SCALAR_WEIGHT:
{
// 2nd and 3rd input is a scalar
if (in1 == null)
return;
OperationsOnMatrixValues.performTernary(in1.getIndexes(), in1.getValue(), scalar_input2, (scalar_input3 == 1), blockRowFactor, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_HISTOGRAM:
{
// 2nd and 3rd inputs are scalars
if (in1 == null)
return;
OperationsOnMatrixValues.performTernary(in1.getIndexes(), in1.getValue(), scalar_input2, scalar_input3, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
{
// 2nd and 3rd inputs are scalars
in3 = cachedValues.getFirst(input3);
if (in1 == null || in3 == null)
return;
OperationsOnMatrixValues.performTernary(in1.getIndexes(), in1.getValue(), scalar_input2, in3.getIndexes(), in3.getValue(), ctableResult, ctableResultBlock, optr);
break;
}
default:
throw new DMLRuntimeException("Unrecognized opcode in Tertiary Instruction: " + instString);
}
}
use of org.apache.sysml.runtime.matrix.data.CTableMap in project incubator-systemml by apache.
the class CtableInstruction method processInstruction.
public void processInstruction(Class<? extends MatrixValue> valueClass, CachedValueMap cachedValues, IndexedMatrixValue zeroInput, HashMap<Byte, CTableMap> resultMaps, HashMap<Byte, MatrixBlock> resultBlocks, int blockRowFactor, int blockColFactor) {
IndexedMatrixValue in1, in2, in3 = null;
in1 = cachedValues.getFirst(input1);
CTableMap ctableResult = null;
MatrixBlock ctableResultBlock = null;
if (knownOutputDims()) {
if (resultBlocks != null) {
ctableResultBlock = resultBlocks.get(output);
if (ctableResultBlock == null) {
// From MR, output of ctable is set to be sparse since it is built from a single input block.
ctableResultBlock = new MatrixBlock((int) _outputDim1, (int) _outputDim2, true);
resultBlocks.put(output, ctableResultBlock);
}
} else {
throw new DMLRuntimeException("Unexpected error in processing table instruction.");
}
} else {
// prepare aggregation maps
ctableResult = resultMaps.get(output);
if (ctableResult == null) {
ctableResult = new CTableMap();
resultMaps.put(output, ctableResult);
}
}
// get inputs and process instruction
switch(_op) {
case CTABLE_TRANSFORM:
{
in2 = cachedValues.getFirst(input2);
in3 = cachedValues.getFirst(input3);
if (in1 == null || in2 == null || in3 == null)
return;
OperationsOnMatrixValues.performCtable(in1.getIndexes(), in1.getValue(), in2.getIndexes(), in2.getValue(), in3.getIndexes(), in3.getValue(), ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_SCALAR_WEIGHT:
{
// 3rd input is a scalar
in2 = cachedValues.getFirst(input2);
if (in1 == null || in2 == null)
return;
OperationsOnMatrixValues.performCtable(in1.getIndexes(), in1.getValue(), in2.getIndexes(), in2.getValue(), scalar_input3, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_EXPAND_SCALAR_WEIGHT:
{
// 2nd and 3rd input is a scalar
if (in1 == null)
return;
OperationsOnMatrixValues.performCtable(in1.getIndexes(), in1.getValue(), scalar_input2, (scalar_input3 == 1), blockRowFactor, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_HISTOGRAM:
{
// 2nd and 3rd inputs are scalars
if (in1 == null)
return;
OperationsOnMatrixValues.performCtable(in1.getIndexes(), in1.getValue(), scalar_input2, scalar_input3, ctableResult, ctableResultBlock, optr);
break;
}
case CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
{
// 2nd and 3rd inputs are scalars
in3 = cachedValues.getFirst(input3);
if (in1 == null || in3 == null)
return;
OperationsOnMatrixValues.performCtable(in1.getIndexes(), in1.getValue(), scalar_input2, in3.getIndexes(), in3.getValue(), ctableResult, ctableResultBlock, optr);
break;
}
default:
throw new DMLRuntimeException("Unrecognized opcode in Tertiary Instruction: " + instString);
}
}
Aggregations