use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class CacheableData method isEqualOutputFormat.
protected boolean isEqualOutputFormat(String outputFormat) {
boolean ret = true;
if (outputFormat != null) {
try {
MetaDataFormat iimd = (MetaDataFormat) _metaData;
OutputInfo oi1 = InputInfo.getMatchingOutputInfo(iimd.getInputInfo());
OutputInfo oi2 = OutputInfo.stringToOutputInfo(outputFormat);
if (oi1 != oi2)
ret = false;
} catch (Exception ex) {
ret = false;
}
}
return ret;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class CacheableData method writeMetaData.
protected void writeMetaData(String filePathAndName, String outputFormat, FileFormatProperties formatProperties) throws IOException {
MetaDataFormat iimd = (MetaDataFormat) _metaData;
if (iimd == null)
throw new DMLRuntimeException("Unexpected error while writing mtd file (" + filePathAndName + ") -- metadata is null.");
// Write the matrix to HDFS in requested format
OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
if (oinfo != OutputInfo.MatrixMarketOutputInfo) {
// Get the dimension information from the metadata stored within MatrixObject
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
// note: this is only required if singlenode (due to binarycell default)
if (oinfo == OutputInfo.BinaryBlockOutputInfo && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE && (mc.getRowsPerBlock() != ConfigurationManager.getBlocksize() || mc.getColsPerBlock() != ConfigurationManager.getBlocksize())) {
mc = new MatrixCharacteristics(mc.getRows(), mc.getCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), mc.getNonZeros());
}
// write the actual meta data file
MapReduceTool.writeMetaDataFile(filePathAndName + ".mtd", valueType, getSchema(), dataType, mc, oinfo, formatProperties);
}
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class FrameObject method writeBlobFromRDDtoHDFS.
@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt) throws IOException, DMLRuntimeException {
// prepare output info
MetaDataFormat iimd = (MetaDataFormat) _metaData;
OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
// note: the write of an RDD to HDFS might trigger
// lazy evaluation of pending transformations.
SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo);
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MatrixObject method readMatrixPartition.
/**
* NOTE: for reading matrix partitions, we could cache (in its real sense) the read block
* with soft references (no need for eviction, as partitioning only applied for read-only matrices).
* However, since we currently only support row- and column-wise partitioning caching is not applied yet.
* This could be changed once we also support column-block-wise and row-block-wise. Furthermore,
* as we reject to partition vectors and support only full row or column indexing, no metadata (apart from
* the partition flag) is required.
*
* @param pred index range
* @return matrix block
*/
public synchronized MatrixBlock readMatrixPartition(IndexRange pred) {
if (LOG.isTraceEnabled())
LOG.trace("Acquire partition " + hashCode() + " " + pred);
long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
if (!_partitioned)
throw new DMLRuntimeException("MatrixObject not available to indexed read.");
// return static partition of set from outside of the program
if (_partitionInMemory != null)
return _partitionInMemory;
MatrixBlock mb = null;
try {
boolean blockwise = (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE || _partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE);
// preparations for block wise access
MetaDataFormat iimd = (MetaDataFormat) _metaData;
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
int brlen = mc.getRowsPerBlock();
int bclen = mc.getColsPerBlock();
// get filename depending on format
String fname = getPartitionFileName(pred, brlen, bclen);
// probe cache
if (blockwise && _partitionCacheName != null && _partitionCacheName.equals(fname)) {
// try getting block from cache
mb = _cache.get();
}
if (// block not in cache
mb == null) {
// get rows and cols
long rows = -1;
long cols = -1;
switch(_partitionFormat) {
case ROW_WISE:
rows = 1;
cols = mc.getCols();
break;
case ROW_BLOCK_WISE:
rows = brlen;
cols = mc.getCols();
break;
case ROW_BLOCK_WISE_N:
rows = _partitionSize;
cols = mc.getCols();
break;
case COLUMN_WISE:
rows = mc.getRows();
cols = 1;
break;
case COLUMN_BLOCK_WISE:
rows = mc.getRows();
cols = bclen;
break;
case COLUMN_BLOCK_WISE_N:
rows = mc.getRows();
cols = _partitionSize;
break;
default:
throw new DMLRuntimeException("Unsupported partition format: " + _partitionFormat);
}
// read the
if (MapReduceTool.existsFileOnHDFS(fname))
mb = readBlobFromHDFS(fname, rows, cols);
else {
mb = new MatrixBlock((int) rows, (int) cols, true);
LOG.warn("Reading empty matrix partition " + fname);
}
}
// post processing
if (blockwise) {
// put block into cache
_partitionCacheName = fname;
_cache = new SoftReference<>(mb);
if (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE) {
int rix = (int) ((pred.rowStart - 1) % brlen);
mb = mb.slice(rix, rix, (int) (pred.colStart - 1), (int) (pred.colEnd - 1), new MatrixBlock());
}
if (_partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE) {
int cix = (int) ((pred.colStart - 1) % bclen);
mb = mb.slice((int) (pred.rowStart - 1), (int) (pred.rowEnd - 1), cix, cix, new MatrixBlock());
}
}
// NOTE: currently no special treatment of non-existing partitions necessary
// because empty blocks are written anyway
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
if (DMLScript.STATISTICS) {
long t1 = System.nanoTime();
CacheStatistics.incrementAcquireRTime(t1 - t0);
}
return mb;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MatrixObject method readBlobFromHDFS.
@Override
protected MatrixBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException {
MetaDataFormat iimd = (MetaDataFormat) _metaData;
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
long begin = 0;
if (LOG.isTraceEnabled()) {
LOG.trace("Reading matrix from HDFS... " + hashCode() + " Path: " + fname + ", dimensions: [" + mc.getRows() + ", " + mc.getCols() + ", " + mc.getNonZeros() + "]");
begin = System.currentTimeMillis();
}
// read matrix and maintain meta data
double sparsity = (mc.getNonZeros() >= 0 ? ((double) mc.getNonZeros()) / (mc.getRows() * mc.getCols()) : 1.0d);
MatrixBlock newData = DataConverter.readMatrixFromHDFS(fname, iimd.getInputInfo(), rlen, clen, mc.getRowsPerBlock(), mc.getColsPerBlock(), sparsity, getFileFormatProperties());
setHDFSFileExists(true);
// sanity check correct output
if (newData == null)
throw new IOException("Unable to load matrix from file: " + fname);
if (LOG.isTraceEnabled())
LOG.trace("Reading Completed: " + (System.currentTimeMillis() - begin) + " msec.");
return newData;
}
Aggregations