use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MatrixObject method readBlobFromRDD.
@Override
protected MatrixBlock readBlobFromRDD(RDDObject rdd, MutableBoolean writeStatus) throws IOException {
// note: the read of a matrix block from an RDD might trigger
// lazy evaluation of pending transformations.
RDDObject lrdd = rdd;
// prepare return status (by default only collect)
writeStatus.setValue(false);
MetaDataFormat iimd = (MetaDataFormat) _metaData;
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
InputInfo ii = iimd.getInputInfo();
MatrixBlock mb = null;
try {
// prevent unnecessary collect through rdd checkpoint
if (rdd.allowsShortCircuitCollect()) {
lrdd = (RDDObject) rdd.getLineageChilds().get(0);
}
// obtain matrix block from RDD
int rlen = (int) mc.getRows();
int clen = (int) mc.getCols();
int brlen = (int) mc.getRowsPerBlock();
int bclen = (int) mc.getColsPerBlock();
long nnz = mc.getNonZerosBound();
// guarded rdd collect
if (// guarded collect not for binary cell
ii == InputInfo.BinaryBlockInputInfo && !OptimizerUtils.checkSparkCollectMemoryBudget(mc, getPinnedSize() + getBroadcastSize(), true)) {
// note: lazy, partition-at-a-time collect (toLocalIterator) was significantly slower
if (!MapReduceTool.existsFileOnHDFS(_hdfsFileName)) {
// prevent overwrite existing file
long newnnz = SparkExecutionContext.writeRDDtoHDFS(lrdd, _hdfsFileName, iimd.getOutputInfo());
_metaData.getMatrixCharacteristics().setNonZeros(newnnz);
// mark rdd as non-pending (for export)
((RDDObject) rdd).setPending(false);
// mark rdd as hdfs file (for restore)
((RDDObject) rdd).setHDFSFile(true);
// mark for no cache-write on read
writeStatus.setValue(true);
// note: the flag hdfsFile is actually not entirely correct because we still hold an rdd
// reference to the input not to an rdd of the hdfs file but the resulting behavior is correct
}
mb = readBlobFromHDFS(_hdfsFileName);
} else if (ii == InputInfo.BinaryCellInputInfo) {
// collect matrix block from binary block RDD
mb = SparkExecutionContext.toMatrixBlock(lrdd, rlen, clen, nnz);
} else {
// collect matrix block from binary cell RDD
mb = SparkExecutionContext.toMatrixBlock(lrdd, rlen, clen, brlen, bclen, nnz);
}
} catch (DMLRuntimeException ex) {
throw new IOException(ex);
}
// sanity check correct output
if (mb == null)
throw new IOException("Unable to load matrix from rdd.");
return mb;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MatrixObject method writeBlobFromRDDtoHDFS.
@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String outputFormat) throws IOException, DMLRuntimeException {
// prepare output info
MetaDataFormat iimd = (MetaDataFormat) _metaData;
OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
// note: the write of an RDD to HDFS might trigger
// lazy evaluation of pending transformations.
long newnnz = SparkExecutionContext.writeRDDtoHDFS(rdd, fname, oinfo);
_metaData.getMatrixCharacteristics().setNonZeros(newnnz);
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class MatrixObject method writeBlobToHDFS.
/**
* Writes in-memory matrix to HDFS in a specified format.
*/
@Override
protected void writeBlobToHDFS(String fname, String ofmt, int rep, FileFormatProperties fprop) throws IOException, DMLRuntimeException {
long begin = 0;
if (LOG.isTraceEnabled()) {
LOG.trace(" Writing matrix to HDFS... " + hashCode() + " Path: " + fname + ", Format: " + (ofmt != null ? ofmt : "inferred from metadata"));
begin = System.currentTimeMillis();
}
MetaDataFormat iimd = (MetaDataFormat) _metaData;
if (_data != null) {
// Get the dimension information from the metadata stored within MatrixObject
MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
// Write the matrix to HDFS in requested format
OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
// note: this is only required if singlenode (due to binarycell default)
if (oinfo == OutputInfo.BinaryBlockOutputInfo && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE && (mc.getRowsPerBlock() != ConfigurationManager.getBlocksize() || mc.getColsPerBlock() != ConfigurationManager.getBlocksize())) {
DataConverter.writeMatrixToHDFS(_data, fname, oinfo, new MatrixCharacteristics(mc.getRows(), mc.getCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), mc.getNonZeros()), rep, fprop);
} else {
DataConverter.writeMatrixToHDFS(_data, fname, oinfo, mc, rep, fprop);
}
if (LOG.isTraceEnabled())
LOG.trace("Writing matrix to HDFS (" + fname + ") - COMPLETED... " + (System.currentTimeMillis() - begin) + " msec.");
} else if (LOG.isTraceEnabled()) {
LOG.trace("Writing matrix to HDFS (" + fname + ") - NOTHING TO WRITE (_data == null).");
}
if (DMLScript.STATISTICS)
CacheStatistics.incrementHDFSWrites();
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ProgramConverter method parseDataObject.
/**
* NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
* related symbol tables can be hierarchically structured.
*
* @param in data object as string
* @return array of objects
*/
public static Object[] parseDataObject(String in) {
Object[] ret = new Object[2];
StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
String name = st.nextToken();
DataType datatype = DataType.valueOf(st.nextToken());
ValueType valuetype = ValueType.valueOf(st.nextToken());
String valString = st.hasMoreTokens() ? st.nextToken() : "";
Data dat = null;
switch(datatype) {
case SCALAR:
{
switch(valuetype) {
case INT:
dat = new IntObject(Long.parseLong(valString));
break;
case DOUBLE:
dat = new DoubleObject(Double.parseDouble(valString));
break;
case BOOLEAN:
dat = new BooleanObject(Boolean.parseBoolean(valString));
break;
case STRING:
dat = new StringObject(valString);
break;
default:
throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
}
break;
}
case MATRIX:
{
MatrixObject mo = new MatrixObject(valuetype, valString);
long rows = Long.parseLong(st.nextToken());
long cols = Long.parseLong(st.nextToken());
int brows = Integer.parseInt(st.nextToken());
int bcols = Integer.parseInt(st.nextToken());
long nnz = Long.parseLong(st.nextToken());
InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
UpdateType inplace = UpdateType.valueOf(st.nextToken());
MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
mo.setMetaData(md);
if (partFormat._dpf != PDataPartitionFormat.NONE)
mo.setPartitioned(partFormat._dpf, partFormat._N);
mo.setUpdateType(inplace);
dat = mo;
break;
}
default:
throw new DMLRuntimeException("Unable to parse datatype " + datatype);
}
ret[0] = name;
ret[1] = dat;
return ret;
}
use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.
the class ResultMergeLocalFile method createNewMatrixObject.
private MatrixObject createNewMatrixObject(MatrixObject output, ArrayList<MatrixObject> inMO) {
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixObject moNew = new MatrixObject(_output.getValueType(), _outputFName);
// create deep copy of metadata obj
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
mc.setNonZeros(_isAccum ? -1 : computeNonZeros(output, inMO));
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
return moNew;
}
Aggregations