use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryBlockResultFile.
@SuppressWarnings("deprecation")
private void createBinaryBlockResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
long rlen = mc.getRows();
long clen = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
//beware ca 50ms
SequenceFile.Writer writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class);
try {
MatrixIndexes indexes = new MatrixIndexes();
for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
MatrixBlock mb = null;
if (dir.exists()) {
if (//WITH COMPARE BLOCK
withCompare && dir2.exists()) {
//copy only values that are different from the original
String[] lnames2 = dir2.list();
if (//there should be exactly 1 compare block
lnames2.length != 1)
throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
mb = LocalFileUtils.readMatrixBlockFromLocal(dir2 + "/" + lnames2[0]);
boolean appendOnly = mb.isInSparseFormat();
double[][] compare = DataConverter.convertToDoubleMatrix(mb);
String[] lnames = dir.list();
for (String lname : lnames) {
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
mergeWithComp(mb, tmp, compare);
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
} else //WITHOUT COMPARE BLOCK
{
//copy all non-zeros from all workers
String[] lnames = dir.list();
boolean appendOnly = false;
for (String lname : lnames) {
if (mb == null) {
mb = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
appendOnly = mb.isInSparseFormat();
} else {
MatrixBlock tmp = LocalFileUtils.readMatrixBlockFromLocal(dir + "/" + lname);
mergeWithoutComp(mb, tmp, appendOnly);
}
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
}
} else {
//NOTE: whenever runtime does not need all blocks anymore, this can be removed
int maxRow = (int) (((brow - 1) * brlen + brlen < rlen) ? brlen : rlen - (brow - 1) * brlen);
int maxCol = (int) (((bcol - 1) * bclen + bclen < clen) ? bclen : clen - (bcol - 1) * bclen);
mb = new MatrixBlock(maxRow, maxCol, true);
}
//mb.examSparsity(); //done on write anyway and mb not reused
indexes.setIndexes(brow, bcol);
writer.append(indexes, mb);
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryCellStagingFile.
@SuppressWarnings("deprecation")
private void createBinaryCellStagingFile(String fnameStaging, MatrixObject mo, long ID) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(mo.getFileName());
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
LinkedList<Cell> buffer = new LinkedList<Cell>();
MatrixIndexes key = new MatrixIndexes();
MatrixCell value = new MatrixCell();
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
for (Path lpath : MatrixReader.getSequenceFilePaths(fs, path)) {
SequenceFile.Reader reader = new SequenceFile.Reader(fs, lpath, job);
try {
while (reader.next(key, value)) {
Cell tmp = new Cell(key.getRowIndex(), key.getColumnIndex(), value.getValue());
buffer.addLast(tmp);
if (//periodic flush
buffer.size() > StagingFileUtils.CELL_BUFFER_SIZE) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
}
//final flush
if (!buffer.isEmpty()) {
appendCellBufferToStagingArea(fnameStaging, ID, buffer, brlen, bclen);
buffer.clear();
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class RemoteDPParForSpark method getPartitionedInput.
@SuppressWarnings("unchecked")
private static JavaPairRDD<Long, Writable> getPartitionedInput(SparkExecutionContext sec, String matrixvar, OutputInfo oi, PartitionFormat dpf) throws DMLRuntimeException {
InputInfo ii = InputInfo.BinaryBlockInputInfo;
MatrixObject mo = sec.getMatrixObject(matrixvar);
MatrixCharacteristics mc = mo.getMatrixCharacteristics();
//NOTE: there will always be a checkpoint rdd on top of the input rdd and the dataset
if (hasInputDataSet(dpf, mo)) {
DatasetObject dsObj = (DatasetObject) mo.getRDDHandle().getLineageChilds().get(0).getLineageChilds().get(0);
Dataset<Row> in = dsObj.getDataset();
//construct or reuse row ids
JavaPairRDD<Row, Long> prepinput = dsObj.containsID() ? in.javaRDD().mapToPair(new DataFrameExtractIDFunction(in.schema().fieldIndex(RDDConverterUtils.DF_ID_COLUMN))) : //zip row index
in.javaRDD().zipWithIndex();
//convert row to row in matrix block format
return prepinput.mapToPair(new DataFrameToRowBinaryBlockFunction(mc.getCols(), dsObj.isVectorBased(), dsObj.containsID()));
} else //binary block input rdd without grouping
if (!requiresGrouping(dpf, mo)) {
//get input rdd and data partitioning
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
DataPartitionerRemoteSparkMapper dpfun = new DataPartitionerRemoteSparkMapper(mc, ii, oi, dpf._dpf, dpf._N);
return in.flatMapToPair(dpfun);
} else //default binary block input rdd with grouping
{
//get input rdd, avoid unnecessary caching if input is checkpoint and not cached yet
//to reduce memory pressure for shuffle and subsequent
JavaPairRDD<MatrixIndexes, MatrixBlock> in = sec.getBinaryBlockRDDHandleForVariable(matrixvar);
if (mo.getRDDHandle().isCheckpointRDD() && !sec.isRDDCached(in.id()))
in = (JavaPairRDD<MatrixIndexes, MatrixBlock>) ((RDDObject) mo.getRDDHandle().getLineageChilds().get(0)).getRDD();
//data partitioning of input rdd
DataPartitionerRemoteSparkMapper dpfun = new DataPartitionerRemoteSparkMapper(mc, ii, oi, dpf._dpf, dpf._N);
return in.flatMapToPair(dpfun);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class ResultMergeLocalFile method createBinaryCellResultFile.
@SuppressWarnings("deprecation")
private void createBinaryCellResultFile(String fnameStaging, String fnameStagingCompare, String fnameNew, MatrixFormatMetaData metadata, boolean withCompare) throws IOException, DMLRuntimeException {
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameNew);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
MatrixCharacteristics mc = metadata.getMatrixCharacteristics();
long rlen = mc.getRows();
long clen = mc.getCols();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
MatrixIndexes indexes = new MatrixIndexes(1, 1);
MatrixCell cell = new MatrixCell(0);
//beware ca 50ms
SequenceFile.Writer out = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixCell.class);
try {
boolean written = false;
for (long brow = 1; brow <= (long) Math.ceil(rlen / (double) brlen); brow++) for (long bcol = 1; bcol <= (long) Math.ceil(clen / (double) bclen); bcol++) {
File dir = new File(fnameStaging + "/" + brow + "_" + bcol);
File dir2 = new File(fnameStagingCompare + "/" + brow + "_" + bcol);
MatrixBlock mb = null;
long row_offset = (brow - 1) * brlen + 1;
long col_offset = (bcol - 1) * bclen + 1;
if (dir.exists()) {
if (//WITH COMPARE BLOCK
withCompare && dir2.exists()) {
//copy only values that are different from the original
String[] lnames2 = dir2.list();
if (//there should be exactly 1 compare block
lnames2.length != 1)
throw new DMLRuntimeException("Unable to merge results because multiple compare blocks found.");
mb = StagingFileUtils.readCellList2BlockFromLocal(dir2 + "/" + lnames2[0], brlen, bclen);
boolean appendOnly = mb.isInSparseFormat();
double[][] compare = DataConverter.convertToDoubleMatrix(mb);
String[] lnames = dir.list();
for (String lname : lnames) {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithComp(mb, tmp, compare);
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
} else //WITHOUT COMPARE BLOCK
{
//copy all non-zeros from all workers
String[] lnames = dir.list();
boolean appendOnly = false;
for (String lname : lnames) {
if (mb == null) {
mb = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
appendOnly = mb.isInSparseFormat();
} else {
MatrixBlock tmp = StagingFileUtils.readCellList2BlockFromLocal(dir + "/" + lname, brlen, bclen);
mergeWithoutComp(mb, tmp, appendOnly);
}
}
//sort sparse due to append-only
if (appendOnly)
mb.sortSparseRows();
//change sparsity if required after
mb.examSparsity();
}
}
//write the block to binary cell
if (mb != null) {
if (mb.isInSparseFormat()) {
Iterator<IJV> iter = mb.getSparseBlockIterator();
while (iter.hasNext()) {
IJV lcell = iter.next();
indexes.setIndexes(row_offset + lcell.getI(), col_offset + lcell.getJ());
cell.setValue(lcell.getV());
out.append(indexes, cell);
written = true;
}
} else {
for (int i = 0; i < brlen; i++) for (int j = 0; j < bclen; j++) {
double lvalue = mb.getValueDenseUnsafe(i, j);
if (//for nnz
lvalue != 0) {
indexes.setIndexes(row_offset + i, col_offset + j);
cell.setValue(lvalue);
out.append(indexes, cell);
written = true;
}
}
}
}
}
if (!written)
out.append(indexes, cell);
} finally {
IOUtilFunctions.closeSilently(out);
}
}
use of org.apache.sysml.runtime.matrix.data.MatrixIndexes in project incubator-systemml by apache.
the class ReblockSPInstruction method processMatrixReblockInstruction.
@SuppressWarnings("unchecked")
protected void processMatrixReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) throws DMLRuntimeException {
MatrixObject mo = sec.getMatrixObject(input1.getName());
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
//check jdk version (prevent double.parseDouble contention on <jdk8)
sec.checkAndRaiseValidationWarningJDKVersion();
//get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
//convert textcell to binary block
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double fillValue = 0;
if (mo.getFileFormatProperties() instanceof CSVFileFormatProperties && mo.getFileFormatProperties() != null) {
CSVFileFormatProperties props = (CSVFileFormatProperties) mo.getFileFormatProperties();
hasHeader = props.hasHeader();
delim = props.getDelim();
fill = props.isFill();
fillValue = props.getFillValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
return;
} else if (iinfo == InputInfo.BinaryCellInputInfo) {
JavaPairRDD<MatrixIndexes, MatrixCell> binaryCells = (JavaPairRDD<MatrixIndexes, MatrixCell>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
JavaPairRDD<MatrixIndexes, MatrixBlock> out = RDDConverterUtils.binaryCellToBinaryBlock(sec.getSparkContext(), binaryCells, mcOut, outputEmptyBlocks);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.BinaryBlockInputInfo) {
//BINARY BLOCK <- BINARY BLOCK (different sizes)
JavaPairRDD<MatrixIndexes, MatrixBlock> in1 = sec.getBinaryBlockRDDHandleForVariable(input1.getName());
JavaPairRDD<MatrixIndexes, MatrixBlock> out = in1.flatMapToPair(new ExtractBlockForBinaryReblock(mc, mcOut));
out = RDDAggregateUtils.mergeByKey(out, false);
//put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else {
throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction:" + InputInfo.inputInfoToString(iinfo));
}
}
Aggregations