use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryCellResultFile.
@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MetaDataFormat metadata, boolean withCompare) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
long rlen = mc.getRows();
long clen = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
MatrixIndexes indexes = new MatrixIndexes(1, 1);
MatrixCell cell = new MatrixCell(0);
// beware ca 50ms
SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
try {
boolean written = false;
for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
MatrixBlock mb = null;
long row_offset = (brow - 1) * brlen + 1;
long col_offset = (bcol - 1) * bclen + 1;
if (dir.exists()) {
if (// WITH COMPARE BLOCK
withCompare && dir2.exists()) {
// copy only values that are different from the original
String[] lnames2 = dir2.list();
if (// there should be exactly 1 compare block
lnames2.length != 1)
throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
boolean appendOnly = mb.isInSparseFormat();
DenseBlock compare = DataConverter.convertToDenseBlock(mb, false);
for (String lname : dir.list()) {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithComp(mb, tmp, compare);
}
// sort sparse due to append-only
if (appendOnly && !_isAccum)
mb.sortSparseRows();
// change sparsity if required after
mb.examSparsity();
} else // WITHOUT COMPARE BLOCK
{
// copy all non-zeros from all workers
boolean appendOnly = false;
for (String lname : dir.list()) {
if (mb == null) {
mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
appendOnly = mb.isInSparseFormat();
} else {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithoutComp(mb, tmp, appendOnly);
}
}
// sort sparse due to append-only
if (appendOnly && !_isAccum)
mb.sortSparseRows();
// change sparsity if required after
mb.examSparsity();
}
}
// write the block to binary cell
if (mb != null) {
if (mb.isInSparseFormat()) {
Iterator<IJV> iter = mb.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
indexes.setIndexes(row_offset + lcell.getI(), col_offset + lcell.getJ());
cell.setValue(lcell.getV());
out.append(indexes, cell);
written = true;
}
} else {
for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
double lvalue = mb.getValueDenseUnsafe(i, j);
if (// for nnz
lvalue != 0) {
indexes.setIndexes(row_offset + i, col_offset + j);
cell.setValue(lvalue);
out.append(indexes, cell);
written = true;
}
}
}
}
}
if (!written)
out.append(indexes, cell);
} finally {
IOUtilFunctions.closeSilently(out);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class PairWritableCell method readFields.
@Override
public void readFields(DataInput in) throws IOException {
indexes = new MatrixIndexes();
indexes.readFields(in);
cell = new MatrixCell();
cell.readFields(in);
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project incubator-systemml by apache.
the class TestUtils method writeBinaryTestMatrixCells.
/**
* <p>
* Writes a matrix to a file using the binary cells format.
* </p>
*
* @param file
* file name
* @param matrix
* matrix
*/
@SuppressWarnings("deprecation")
public static void writeBinaryTestMatrixCells(String file, double[][] matrix) {
try {
SequenceFile.Writer writer = null;
try {
Path path = new Path(file);
FileSystem fs = IOUtilFunctions.getFileSystem(path, conf);
writer = new SequenceFile.Writer(fs, conf, path, MatrixIndexes.class, MatrixCell.class);
MatrixIndexes index = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[i].length; j++) {
if (matrix[i][j] != 0) {
index.setIndexes((i + 1), (j + 1));
value.setValue(matrix[i][j]);
writer.append(index, value);
}
}
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
} catch (IOException e) {
e.printStackTrace();
fail("unable to write test matrix: " + e.getMessage());
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class RandSPInstruction method generateSample.
/**
* Helper function to construct a sample.
*
* @param sec spark execution context
*/
private void generateSample(SparkExecutionContext sec) {
long lrows = sec.getScalarInput(rows).getLongValue();
if (maxValue < lrows && !replace)
throw new DMLRuntimeException("Sample (size=" + rows + ") larger than population (size=" + maxValue + ") can only be generated with replacement.");
if (LOG.isTraceEnabled())
LOG.trace("Process RandSPInstruction sample with range=" + maxValue + ", size=" + lrows + ", replace=" + replace + ", seed=" + seed);
// sampling rate that guarantees a sample of size >= sampleSizeLowerBound 99.99% of the time.
double fraction = SamplingUtils.computeFractionForSampleSize((int) lrows, UtilFunctions.toLong(maxValue), replace);
Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(seed);
// divide the population range across numPartitions by creating SampleTasks
double hdfsBlockSize = InfrastructureAnalyzer.getHDFSBlockSize();
long outputSize = MatrixBlock.estimateSizeDenseInMemory(lrows, 1);
int numPartitions = (int) Math.ceil((double) outputSize / hdfsBlockSize);
long partitionSize = (long) Math.ceil(maxValue / numPartitions);
ArrayList<SampleTask> offsets = new ArrayList<>();
long st = 1;
while (st <= maxValue) {
SampleTask s = new SampleTask();
s.range_start = st;
s.seed = bigrand.nextLong();
offsets.add(s);
st = st + partitionSize;
}
JavaRDD<SampleTask> offsetRDD = sec.getSparkContext().parallelize(offsets, numPartitions);
// Construct the sample in a distributed manner
JavaRDD<Double> rdd = offsetRDD.flatMap((new GenerateSampleBlock(replace, fraction, (long) maxValue, partitionSize)));
// Randomize the sampled elements
JavaRDD<Double> randomizedRDD = rdd.mapToPair(new AttachRandom()).sortByKey().values();
// Trim the sampled list to required size & attach matrix indexes to randomized elements
JavaPairRDD<MatrixIndexes, MatrixCell> miRDD = randomizedRDD.zipWithIndex().filter(new TrimSample(lrows)).mapToPair(new Double2MatrixCell());
MatrixCharacteristics mcOut = new MatrixCharacteristics(lrows, 1, rowsInBlock, colsInBlock, lrows);
// Construct BinaryBlock representation
JavaPairRDD<MatrixIndexes, MatrixBlock> mbRDD = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), miRDD, mcOut, true);
sec.getMatrixCharacteristics(output.getName()).setNonZeros(lrows);
sec.setRDDHandleForVariable(output.getName(), mbRDD);
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class CtableSPInstruction method processInstruction.
@Override
public void processInstruction(ExecutionContext ec) {
SparkExecutionContext sec = (SparkExecutionContext) ec;
// get input rdd handle
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> in2 = null;
JavaPairRDD<MatrixIndexes, MatrixBlock> in3 = null;
double scalar_input2 = -1, scalar_input3 = -1;
Ctable.OperationTypes ctableOp = Ctable.findCtableOperationByInputDataTypes(input1.getDataType(), input2.getDataType(), input3.getDataType());
ctableOp = _isExpand ? Ctable.OperationTypes.CTABLE_EXPAND_SCALAR_WEIGHT : ctableOp;
MatrixCharacteristics mc1 = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
// First get the block sizes and then set them as -1 to allow for binary cell reblock
int brlen = mc1.getRowsPerBlock();
int bclen = mc1.getColsPerBlock();
JavaPairRDD<MatrixIndexes, ArrayList<MatrixBlock>> inputMBs = null;
JavaPairRDD<MatrixIndexes, CTableMap> ctables = null;
JavaPairRDD<MatrixIndexes, Double> bincellsNoFilter = null;
boolean setLineage2 = false;
boolean setLineage3 = false;
switch(ctableOp) {
case // (VECTOR)
CTABLE_TRANSFORM:
// F=ctable(A,B,W)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage2 = true;
setLineage3 = true;
inputMBs = in1.cogroup(in2).cogroup(in3).mapToPair(new MapThreeMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_EXPAND_SCALAR_WEIGHT:
// F = ctable(seq,A) or F = ctable(seq,B,1)
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
if (scalar_input3 == 1) {
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
bincellsNoFilter = in2.flatMapToPair(new ExpandScalarCtableOperation(brlen));
break;
}
case // (VECTOR/MATRIX)
CTABLE_TRANSFORM_SCALAR_WEIGHT:
// F = ctable(A,B) or F = ctable(A,B,1)
in2 = sec.getBinaryBlockRDDHandleForVariable(input2.getName());
setLineage2 = true;
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in2).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_HISTOGRAM:
// F=ctable(A,1) or F = ctable(A,1,1)
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
scalar_input3 = sec.getScalarInput(input3.getName(), input3.getValueType(), input3.isLiteral()).getDoubleValue();
inputMBs = in1.mapToPair(new MapMBIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
case // (VECTOR)
CTABLE_TRANSFORM_WEIGHTED_HISTOGRAM:
// F=ctable(A,1,W)
in3 = sec.getBinaryBlockRDDHandleForVariable(input3.getName());
setLineage3 = true;
scalar_input2 = sec.getScalarInput(input2.getName(), input2.getValueType(), input2.isLiteral()).getDoubleValue();
inputMBs = in1.cogroup(in3).mapToPair(new MapTwoMBIterableIntoAL());
ctables = inputMBs.mapToPair(new PerformCTableMapSideOperation(ctableOp, scalar_input2, scalar_input3, this.instString, (SimpleOperator) _optr, _ignoreZeros));
break;
default:
throw new DMLRuntimeException("Encountered an invalid ctable operation (" + ctableOp + ") while executing instruction: " + this.toString());
}
// Now perform aggregation on ctables to get binaryCells
if (bincellsNoFilter == null && ctables != null) {
bincellsNoFilter = ctables.values().flatMapToPair(new ExtractBinaryCellsFromCTable());
bincellsNoFilter = RDDAggregateUtils.sumCellsByKeyStable(bincellsNoFilter);
} else if (!(bincellsNoFilter != null && ctables == null)) {
throw new DMLRuntimeException("Incorrect ctable operation");
}
// handle known/unknown dimensions
long outputDim1 = (_dim1Literal ? (long) Double.parseDouble(_outDim1) : (sec.getScalarInput(_outDim1, ValueType.DOUBLE, false)).getLongValue());
long outputDim2 = (_dim2Literal ? (long) Double.parseDouble(_outDim2) : (sec.getScalarInput(_outDim2, ValueType.DOUBLE, false)).getLongValue());
MatrixCharacteristics mcBinaryCells = null;
boolean findDimensions = (outputDim1 == -1 && outputDim2 == -1);
if (!findDimensions) {
if ((outputDim1 == -1 && outputDim2 != -1) || (outputDim1 != -1 && outputDim2 == -1))
throw new DMLRuntimeException("Incorrect output dimensions passed to TernarySPInstruction:" + outputDim1 + " " + outputDim2);
else
mcBinaryCells = new MatrixCharacteristics(outputDim1, outputDim2, brlen, bclen);
// filtering according to given dimensions
bincellsNoFilter = bincellsNoFilter.filter(new FilterCells(mcBinaryCells.getRows(), mcBinaryCells.getCols()));
}
// convert double values to matrix cell
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = bincellsNoFilter.mapToPair(new ConvertToBinaryCell());
// find dimensions if necessary (w/ cache for reblock)
if (findDimensions) {
binaryCells = SparkUtils.cacheBinaryCellRDD(binaryCells);
mcBinaryCells = SparkUtils.computeMatrixCharacteristics(binaryCells);
}
// store output rdd handle
sec.setRDDHandleForVariable(output.getName(), binaryCells);
mcOut.set(mcBinaryCells);
// Since we are outputing binary cells, we set block sizes = -1
mcOut.setRowsPerBlock(-1);
mcOut.setColsPerBlock(-1);
sec.addLineageRDD(output.getName(), input1.getName());
if (setLineage2)
sec.addLineageRDD(output.getName(), input2.getName());
if (setLineage3)
sec.addLineageRDD(output.getName(), input3.getName());
}
Aggregations