use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ResultMergeLocalMemory method executeParallelMerge.
@Override
public MatrixObject executeParallelMerge(int par) {
// always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (local, in-memory): Execute parallel (par=" + par + ") " + "merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
try {
// get matrix blocks through caching
MatrixBlock outMB = _output.acquireRead();
ArrayList<MatrixObject> inMO = new ArrayList<>();
for (MatrixObject in : _inputs) {
// check for empty inputs (no iterations executed)
if (in != null && in != _output)
inMO.add(in);
}
if (// if there exist something to merge
!inMO.isEmpty()) {
// get old output matrix from cache for compare
// NOTE: always in dense representation in order to allow for parallel unsynchronized access
long rows = outMB.getNumRows();
long cols = outMB.getNumColumns();
MatrixBlock outMBNew = new MatrixBlock((int) rows, (int) cols, false);
outMBNew.allocateDenseBlockUnsafe((int) rows, (int) cols);
// create compare matrix if required (existing data in result)
_compare = getCompareMatrix(outMB);
if (_compare != null)
outMBNew.copy(outMB);
// parallel merge of all inputs
// number of inputs can be lower than par
int numThreads = Math.min(par, inMO.size());
// ensure robustness for remote exec
numThreads = Math.min(numThreads, InfrastructureAnalyzer.getLocalParallelism());
Thread[] threads = new Thread[numThreads];
for (// multiple waves if necessary
int k = 0; // multiple waves if necessary
k < inMO.size(); // multiple waves if necessary
k += numThreads) {
// create and start threads
for (int i = 0; i < threads.length; i++) {
ResultMergeWorker rmw = new ResultMergeWorker(inMO.get(k + i), outMBNew);
threads[i] = new Thread(rmw);
threads[i].setPriority(Thread.MAX_PRIORITY);
// start execution
threads[i].start();
}
// wait for all workers to finish
for (int i = 0; i < threads.length; i++) {
threads[i].join();
}
}
// create new output matrix
// (e.g., to prevent potential export<->read file access conflict in specific cases of
// local-remote nested parfor))
moNew = createNewMatrixObject(outMBNew);
} else {
// return old matrix, to prevent copy
moNew = _output;
}
// release old output, and all inputs
_output.release();
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class ResultMergeRemoteSpark method executeParallelMerge.
@Override
public MatrixObject executeParallelMerge(int par) {
// always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
if (LOG.isTraceEnabled())
LOG.trace("ResultMerge (remote, spark): Execute serial merge for output " + _output.hashCode() + " (fname=" + _output.getFileName() + ")");
try {
if (_inputs != null && _inputs.length > 0) {
// prepare compare
MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
MatrixObject compare = (mcOld.getNonZeros() == 0) ? null : _output;
// actual merge
RDDObject ro = executeMerge(compare, _inputs, mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
// create new output matrix (e.g., to prevent potential export<->read file access conflict
moNew = new MatrixObject(_output.getValueType(), _outputFName);
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
mc.setNonZeros(_isAccum ? -1 : computeNonZeros(_output, Arrays.asList(_inputs)));
MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
moNew.setMetaData(meta);
moNew.setRDDHandle(ro);
} else {
// return old matrix, to prevent copy
moNew = _output;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class VariableCPInstruction method processWriteInstruction.
/**
* Handler for write instructions.
*
* Non-native formats like MM and CSV are handled through specialized helper functions.
* The default behavior is to write out the specified matrix from the instruction, in
* the format given by the corresponding symbol table entry.
*
* @param ec execution context
*/
private void processWriteInstruction(ExecutionContext ec) {
// get filename (literal or variable expression)
String fname = ec.getScalarInput(getInput2().getName(), ValueType.STRING, getInput2().isLiteral()).getStringValue();
String desc = ec.getScalarInput(getInput4().getName(), ValueType.STRING, getInput4().isLiteral()).getStringValue();
_formatProperties.setDescription(desc);
if (getInput1().getDataType() == DataType.SCALAR) {
writeScalarToHDFS(ec, fname);
} else if (getInput1().getDataType() == DataType.MATRIX) {
String outFmt = getInput3().getName();
if (outFmt.equalsIgnoreCase("matrixmarket"))
writeMMFile(ec, fname);
else if (outFmt.equalsIgnoreCase("csv"))
writeCSVFile(ec, fname);
else {
// Default behavior
MatrixObject mo = ec.getMatrixObject(getInput1().getName());
mo.exportData(fname, outFmt, _formatProperties);
}
} else if (getInput1().getDataType() == DataType.FRAME) {
String outFmt = getInput3().getName();
FrameObject mo = ec.getFrameObject(getInput1().getName());
mo.exportData(fname, outFmt, _formatProperties);
}
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class VariableCPInstruction method processMoveInstruction.
/**
* Handler for mvvar instructions.
* Example: mvvar <srcvar> <destFile> <format>
* Move the file pointed by srcvar to destFile.
* Currently, applicable only when format=binaryblock.
*
* @param ec execution context
*/
@SuppressWarnings("rawtypes")
private void processMoveInstruction(ExecutionContext ec) {
if (getInput3() == null) {
// example: mvvar tempA A
// get source variable
Data srcData = ec.getVariable(getInput1().getName());
if (srcData == null) {
throw new DMLRuntimeException("Unexpected error: could not find a data object " + "for variable name:" + getInput1().getName() + ", while processing instruction ");
}
if (getInput2().getDataType().isMatrix() || getInput2().getDataType().isFrame()) {
// remove existing variable bound to target name
Data tgt = ec.removeVariable(getInput2().getName());
// cleanup matrix data on fs/hdfs (if necessary)
if (tgt != null && tgt instanceof CacheableData) {
ec.cleanupCacheableData((CacheableData<?>) tgt);
}
}
// do the actual move
ec.setVariable(getInput2().getName(), srcData);
ec.removeVariable(getInput1().getName());
} else {
// example instruction: mvvar <srcVar> <destFile> <format>
if (ec.getVariable(getInput1().getName()) == null)
throw new DMLRuntimeException("Unexpected error: could not find a data object for variable name:" + getInput1().getName() + ", while processing instruction " + this.toString());
Object object = ec.getVariable(getInput1().getName());
if (getInput3().getName().equalsIgnoreCase("binaryblock")) {
boolean success = false;
success = ((CacheableData) object).moveData(getInput2().getName(), getInput3().getName());
if (!success) {
throw new DMLRuntimeException("Failed to move var " + getInput1().getName() + " to file " + getInput2().getName() + ".");
}
} else if (object instanceof MatrixObject)
throw new DMLRuntimeException("Unexpected formats while copying: from matrix blocks [" + ((MatrixObject) object).getNumRowsPerBlock() + "," + ((MatrixObject) object).getNumColumnsPerBlock() + "] to " + getInput3().getName());
else if (object instanceof FrameObject)
throw new DMLRuntimeException("Unexpected formats while copying: from fram object [" + ((FrameObject) object).getNumColumns() + "," + ((FrameObject) object).getNumColumns() + "] to " + getInput3().getName());
}
}
use of org.apache.sysml.runtime.controlprogram.caching.MatrixObject in project incubator-systemml by apache.
the class VariableCPInstruction method writeCSVFile.
/**
* Helper function to write CSV files to HDFS.
*
* @param ec execution context
* @param fname file name
*/
private void writeCSVFile(ExecutionContext ec, String fname) {
MatrixObject mo = ec.getMatrixObject(getInput1().getName());
String outFmt = "csv";
if (mo.isDirty()) {
// there exist data computed in CP that is not backed up on HDFS
// i.e., it is either in-memory or in evicted space
mo.exportData(fname, outFmt, _formatProperties);
} else {
try {
OutputInfo oi = ((MetaDataFormat) mo.getMetaData()).getOutputInfo();
MatrixCharacteristics mc = ((MetaDataFormat) mo.getMetaData()).getMatrixCharacteristics();
if (oi == OutputInfo.CSVOutputInfo) {
WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties) _formatProperties);
writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
} else if (oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
mo.exportData(fname, outFmt, _formatProperties);
} else {
throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
}
// Write Metadata file
MapReduceTool.writeMetaDataFile(fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, _formatProperties);
} catch (IOException e) {
throw new DMLRuntimeException(e);
}
}
}
Aggregations