use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.
the class LibMatrixReorg method rmempty.
/**
* MR rmempty interface - for rmempty we cannot view blocks independently, and hence,
* there are different CP and MR interfaces.
*
* @param data ?
* @param offset ?
* @param rmRows ?
* @param len ?
* @param brlen number of rows in a block
* @param bclen number of columns in a block
* @param outList list of indexed matrix values
*/
public static void rmempty(IndexedMatrixValue data, IndexedMatrixValue offset, boolean rmRows, long len, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
// sanity check inputs
if (!(data.getValue() instanceof MatrixBlock && offset.getValue() instanceof MatrixBlock))
throw new DMLRuntimeException("Unsupported input data: expected " + MatrixBlock.class.getName() + " but got " + data.getValue().getClass().getName() + " and " + offset.getValue().getClass().getName());
if (rmRows && data.getValue().getNumRows() != offset.getValue().getNumRows() || !rmRows && data.getValue().getNumColumns() != offset.getValue().getNumColumns()) {
throw new DMLRuntimeException("Dimension mismatch between input data and offsets: [" + data.getValue().getNumRows() + "x" + data.getValue().getNumColumns() + " vs " + offset.getValue().getNumRows() + "x" + offset.getValue().getNumColumns());
}
// compute outputs (at most two output blocks)
HashMap<MatrixIndexes, IndexedMatrixValue> out = new HashMap<>();
MatrixBlock linData = (MatrixBlock) data.getValue();
MatrixBlock linOffset = (MatrixBlock) offset.getValue();
MatrixIndexes tmpIx = new MatrixIndexes(-1, -1);
if (// margin = "rows"
rmRows) {
long rlen = len;
long clen = linData.getNumColumns();
for (int i = 0; i < linOffset.getNumRows(); i++) {
long rix = (long) linOffset.quickGetValue(i, 0);
if (// otherwise empty row
rix > 0) {
// get single row from source block
MatrixBlock src = (MatrixBlock) linData.slice(i, i, 0, (int) (clen - 1), new MatrixBlock());
long brix = (rix - 1) / brlen + 1;
long lbrix = (rix - 1) % brlen;
tmpIx.setIndexes(brix, data.getIndexes().getColumnIndex());
// create target block if necessary
if (!out.containsKey(tmpIx)) {
IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
tmpIMV.getIndexes().setIndexes(tmpIx);
((MatrixBlock) tmpIMV.getValue()).reset((int) Math.min(brlen, rlen - ((brix - 1) * brlen)), (int) clen);
out.put(tmpIMV.getIndexes(), tmpIMV);
}
// put single row into target block
((MatrixBlock) out.get(tmpIx).getValue()).copy((int) lbrix, (int) lbrix, 0, (int) clen - 1, src, false);
}
}
} else // margin = "cols"
{
long rlen = linData.getNumRows();
long clen = len;
for (int i = 0; i < linOffset.getNumColumns(); i++) {
long cix = (long) linOffset.quickGetValue(0, i);
if (// otherwise empty row
cix > 0) {
// get single row from source block
MatrixBlock src = (MatrixBlock) linData.slice(0, (int) (rlen - 1), i, i, new MatrixBlock());
long bcix = (cix - 1) / bclen + 1;
long lbcix = (cix - 1) % bclen;
tmpIx.setIndexes(data.getIndexes().getRowIndex(), bcix);
// create target block if necessary
if (!out.containsKey(tmpIx)) {
IndexedMatrixValue tmpIMV = new IndexedMatrixValue(new MatrixIndexes(), new MatrixBlock());
tmpIMV.getIndexes().setIndexes(tmpIx);
((MatrixBlock) tmpIMV.getValue()).reset((int) rlen, (int) Math.min(bclen, clen - ((bcix - 1) * bclen)));
out.put(tmpIMV.getIndexes(), tmpIMV);
}
// put single row into target block
((MatrixBlock) out.get(tmpIx).getValue()).copy(0, (int) rlen - 1, (int) lbcix, (int) lbcix, src, false);
}
}
}
// prepare and return outputs (already in cached values)
for (IndexedMatrixValue imv : out.values()) {
((MatrixBlock) imv.getValue()).recomputeNonZeros();
outList.add(imv);
}
}
use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.
the class LibMatrixReorg method rexpand.
/**
* MR/Spark rexpand operation (single input, multiple outputs incl empty blocks)
*
* @param data indexed matrix value
* @param max ?
* @param rows ?
* @param cast ?
* @param ignore ?
* @param brlen number of rows in a block
* @param bclen number of columns in a block
* @param outList list of indexed matrix values
*/
public static void rexpand(IndexedMatrixValue data, double max, boolean rows, boolean cast, boolean ignore, long brlen, long bclen, ArrayList<IndexedMatrixValue> outList) {
// prepare parameters
MatrixIndexes ix = data.getIndexes();
MatrixBlock in = (MatrixBlock) data.getValue();
// execute rexpand operations incl sanity checks
// TODO more robust (memory efficient) implementation w/o tmp block
MatrixBlock tmp = rexpand(in, new MatrixBlock(), max, rows, cast, ignore, 1);
// prepare outputs blocks (slice tmp block into output blocks )
if (// expanded vertically
rows) {
for (int rl = 0; rl < tmp.getNumRows(); rl += brlen) {
MatrixBlock mb = tmp.slice(rl, (int) (Math.min(rl + brlen, tmp.getNumRows()) - 1), 0, tmp.getNumColumns() - 1, new MatrixBlock());
outList.add(new IndexedMatrixValue(new MatrixIndexes(rl / brlen + 1, ix.getRowIndex()), mb));
}
} else // expanded horizontally
{
for (int cl = 0; cl < tmp.getNumColumns(); cl += bclen) {
MatrixBlock mb = tmp.slice(0, tmp.getNumRows() - 1, cl, (int) (Math.min(cl + bclen, tmp.getNumColumns()) - 1), new MatrixBlock());
outList.add(new IndexedMatrixValue(new MatrixIndexes(ix.getRowIndex(), cl / bclen + 1), mb));
}
}
}
use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.
the class ReaderBinaryBlock method readIndexedMatrixBlocksFromHDFS.
public ArrayList<IndexedMatrixValue> readIndexedMatrixBlocksFromHDFS(String fname, long rlen, long clen, int brlen, int bclen) throws IOException, DMLRuntimeException {
// allocate output matrix block collection
ArrayList<IndexedMatrixValue> ret = new ArrayList<>();
// prepare file access
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path((_localFS ? "file:///" : "") + fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// check existence and non-empty file
checkValidInputFile(fs, path);
// core read
readBinaryBlockMatrixBlocksFromHDFS(path, job, fs, ret, rlen, clen, brlen, bclen);
return ret;
}
use of org.apache.sysml.runtime.matrix.mapred.IndexedMatrixValue in project systemml by apache.
the class CpmmSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get rdd inputs
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mc2 = sec.getMatrixCharacteristics(input2.getName());
if (!_outputEmptyBlocks || _aggtype == SparkAggType.SINGLE_BLOCK) {
// prune empty blocks of ultra-sparse matrices
in1 = in1.filter(new FilterNonEmptyBlocksFunction());
in2 = in2.filter(new FilterNonEmptyBlocksFunction());
}
// compute preferred join degree of parallelism
int numPreferred = getPreferredParJoin(mc1, mc2, in1.getNumPartitions(), in2.getNumPartitions());
int numPartJoin = Math.min(getMaxParJoin(mc1, mc2), numPreferred);
// process core cpmm matrix multiply
JavaPairRDD<Long, IndexedMatrixValue> tmp1 = in1.mapToPair(new CpmmIndexFunction(true));
JavaPairRDD<Long, IndexedMatrixValue> tmp2 = in2.mapToPair(new CpmmIndexFunction(false));
JavaPairRDD<MatrixIndexes, MatrixBlock> out = tmp1.join(tmp2, // join over common dimension
numPartJoin).mapToPair(// compute block multiplications
new CpmmMultiplyFunction());
// process cpmm aggregation and handle outputs
if (_aggtype == SparkAggType.SINGLE_BLOCK) {
// prune empty blocks and aggregate all results
out = out.filter(new FilterNonEmptyBlocksFunction());
MatrixBlock out2 = RDDAggregateUtils.sumStable(out);
// put output block into symbol table (no lineage because single block)
// this also includes implicit maintenance of matrix characteristics
sec.setMatrixOutput(output.getName(), out2, getExtendedOpcode());
} else {
// DEFAULT: MULTI_BLOCK
if (!_outputEmptyBlocks)
out = out.filter(new FilterNonEmptyBlocksFunction());
out = RDDAggregateUtils.sumByKeyStable(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
sec.addLineageRDD(output.getName(), input2.getName());
// update output statistics if not inferred
updateBinaryMMOutputMatrixCharacteristics(sec, true);
}
}
Aggregations