use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class TestUtils method writeBinaryTestMatrixCells.
/**
* <p>
* Writes a matrix to a file using the binary cells format.
* </p>
*
* @param file
* file name
* @param matrix
* matrix
*/
@SuppressWarnings("deprecation")
public static void writeBinaryTestMatrixCells(String file, double[][] matrix) {
try {
SequenceFile.Writer writer = null;
try {
Path path = new Path(file);
FileSystem fs = IOUtilFunctions.getFileSystem(path, conf);
writer = new SequenceFile.Writer(fs, conf, path, MatrixIndexes.class, MatrixCell.class);
MatrixIndexes index = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[i].length; j++) {
if (matrix[i][j] != 0) {
index.setIndexes((i + 1), (j + 1));
value.setValue(matrix[i][j]);
writer.append(index, value);
}
}
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
} catch (IOException e) {
e.printStackTrace();
fail("unable to write test matrix: " + e.getMessage());
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class SparkExecutionContext method toMatrixBlock.
/**
* Utility method for creating a single matrix block out of a binary cell RDD.
* Note that this collect call might trigger execution of any pending transformations.
*
* @param rdd JavaPairRDD for matrix block
* @param rlen number of rows
* @param clen number of columns
* @param nnz number of non-zeros
* @return matrix block
*/
public static MatrixBlock toMatrixBlock(JavaPairRDD<MatrixIndexes, MatrixCell> rdd, int rlen, int clen, long nnz) {
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
MatrixBlock out = null;
// determine target sparse/dense representation
long lnnz = (nnz >= 0) ? nnz : (long) rlen * clen;
boolean sparse = MatrixBlock.evalSparseFormatInMemory(rlen, clen, lnnz);
// create output matrix block (w/ lazy allocation)
out = new MatrixBlock(rlen, clen, sparse);
List<Tuple2<MatrixIndexes, MatrixCell>> list = rdd.collect();
// copy blocks one-at-a-time into output matrix block
for (Tuple2<MatrixIndexes, MatrixCell> keyval : list) {
// unpack index-block pair
MatrixIndexes ix = keyval._1();
MatrixCell cell = keyval._2();
// append cell to dense/sparse target in order to avoid shifting for sparse
// note: this append requires a final sort of sparse rows
out.appendValue((int) ix.getRowIndex() - 1, (int) ix.getColumnIndex() - 1, cell.getValue());
}
// post-processing output matrix
if (sparse)
out.sortSparseRows();
out.recomputeNonZeros();
out.examSparsity();
if (DMLScript.STATISTICS) {
Statistics.accSparkCollectTime(System.nanoTime() - t0);
Statistics.incSparkCollectCount(1);
}
return out;
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class DataPartitionerLocal method partitionBinaryCell.
@SuppressWarnings("deprecation")
private void partitionBinaryCell(String fname, String fnameStaging, String fnameNew, long rlen, long clen, int brlen, int bclen) {
long row = -1;
long col = -1;
try {
// STEP 1: read matrix from HDFS and write blocks to local staging area
// check and add input path
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
// prepare sequence file reader, and write to local staging area
LinkedList<Cell> buffer = new LinkedList<>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
for (Path lpath : IOUtilFunctions.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
row = key.getRowIndex();
col = key.getColumnIndex();
Cell tmp = new Cell(row, col, value.getValue());
buffer.addLast(tmp);
if (// periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
}
// final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
// STEP 2: read matrix blocks from staging area and write matrix to HDFS
String[] fnamesPartitions = new File(fnameStaging).list();
if (PARALLEL) {
int len = Math.min(fnamesPartitions.length, _par);
Thread[] threads = new Thread[len];
for (int i = 0; i < len; i++) {
int start = i * (int) Math.ceil(((double) fnamesPartitions.length) / len);
int end = (i + 1) * (int) Math.ceil(((double) fnamesPartitions.length) / len) - 1;
end = Math.min(end, fnamesPartitions.length - 1);
threads[i] = new Thread(new DataPartitionerWorkerBinaryCell(job, fnameNew, fnameStaging, fnamesPartitions, start, end));
threads[i].start();
}
for (Thread t : threads) t.join();
} else {
for (String pdir : fnamesPartitions) writeBinaryCellSequenceFileToHDFS(job, fnameNew, fnameStaging + "/" + pdir);
}
} catch (Exception e) {
// post-mortem error handling and bounds checking
if (row < 1 || row > rlen || col < 1 || col > clen) {
throw new DMLRuntimeException("Matrix cell [" + (row) + "," + (col) + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "].");
} else
throw new DMLRuntimeException("Unable to partition binary cell matrix.", e);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class ReblockSPInstruction method processMatrixReblockInstruction.
@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) {
MatrixObject mo = sec.getMatrixObject(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
// get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
// convert textcell to binary block
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double fillValue = 0;
if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
hasHeader = props.hasHeader();
delim = props.getDelim();
fill = props.isFill();
fillValue = props.getFillValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
return;
} else if (iinfo == InputInfo.BinaryCellInputInfo) {
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.BinaryBlockInputInfo) {
// BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
boolean shuffleFreeReblock = mc.dimsKnown() && mcOut.dimsKnown() && (mc.getRows() < mcOut.getRowsPerBlock() || mc.getRowsPerBlock() % mcOut.getRowsPerBlock() == 0) && (mc.getCols() < mcOut.getColsPerBlock() || mc.getColsPerBlock() % mcOut.getColsPerBlock() == 0);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
if (!shuffleFreeReblock)
out = RDDAggregateUtils.mergeByKey(out, false);
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else {
throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixCell in project systemml by apache.
the class CopyBinaryCellFunction method call.
@Override
public Tuple2<MatrixIndexes, MatrixCell> call(Tuple2<MatrixIndexes, MatrixCell> arg0) throws Exception {
MatrixIndexes ix = new MatrixIndexes(arg0._1());
MatrixCell cell = new MatrixCell();
cell.copy(arg0._2());
return new Tuple2<>(ix, cell);
}
Aggregations