use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeRemoteMapper method configure.
public void configure(JobConf job) {
InputInfo ii = MRJobConfiguration.getResultMergeInputInfo(job);
long[] tmp = MRJobConfiguration.getResultMergeMatrixCharacteristics(job);
String compareFname = MRJobConfiguration.getResultMergeInfoCompareFilename(job);
String currentFname = job.get(MRConfigurationNames.MR_MAP_INPUT_FILE);
byte tag = 0;
//startsWith comparison in order to account for part names in currentFname
if (currentFname.startsWith(compareFname))
tag = ResultMergeRemoteMR.COMPARE_TAG;
else
tag = ResultMergeRemoteMR.DATA_TAG;
if (ii == InputInfo.TextCellInputInfo)
_mapper = new ResultMergeMapperTextCell(tag);
else if (ii == InputInfo.BinaryCellInputInfo)
_mapper = new ResultMergeMapperBinaryCell(tag);
else if (ii == InputInfo.BinaryBlockInputInfo)
_mapper = new ResultMergeMapperBinaryBlock(tag, tmp[0], tmp[1], tmp[2], tmp[3]);
else
throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class ResultMergeRemoteSpark method executeParallelMerge.
@Override
public MatrixObject executeParallelMerge(int par) throws DMLRuntimeException {
//always create new matrix object (required for nested parallelism)
MatrixObject moNew = null;
LOG.trace("ResultMerge (remote, spark): Execute serial merge for output " + _output.getVarName() + " (fname=" + _output.getFileName() + ")");
try {
if (_inputs != null && _inputs.length > 0) {
//prepare compare
MatrixFormatMetaData metadata = (MatrixFormatMetaData) _output.getMetaData();
MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
MatrixObject compare = (mcOld.getNonZeros() == 0) ? null : _output;
//actual merge
RDDObject ro = executeMerge(compare, _inputs, _output.getVarName(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
//create new output matrix (e.g., to prevent potential export<->read file access conflict
String varName = _output.getVarName();
ValueType vt = _output.getValueType();
moNew = new MatrixObject(vt, _outputFName);
moNew.setVarName(varName.contains(NAME_SUFFIX) ? varName : varName + NAME_SUFFIX);
moNew.setDataType(DataType.MATRIX);
OutputInfo oiOld = metadata.getOutputInfo();
InputInfo iiOld = metadata.getInputInfo();
MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
mc.setNonZeros(computeNonZeros(_output, convertToList(_inputs)));
MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, oiOld, iiOld);
moNew.setMetaData(meta);
moNew.setRDDHandle(ro);
} else {
//return old matrix, to prevent copy
moNew = _output;
}
} catch (Exception ex) {
throw new DMLRuntimeException(ex);
}
return moNew;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class MatrixReaderFactory method createMatrixReader.
public static MatrixReader createMatrixReader(ReadProperties props) throws DMLRuntimeException {
//check valid read properties
if (props == null)
throw new DMLRuntimeException("Failed to create matrix reader with empty properties.");
MatrixReader reader = null;
InputInfo iinfo = props.inputInfo;
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCellParallel(iinfo);
else
reader = new ReaderTextCell(iinfo);
} else if (iinfo == InputInfo.CSVInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCSVParallel(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
else
reader = new ReaderTextCSV(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
} else if (iinfo == InputInfo.BinaryCellInputInfo)
reader = new ReaderBinaryCell();
else if (iinfo == InputInfo.BinaryBlockInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderBinaryBlockParallel(props.localFS);
else
reader = new ReaderBinaryBlock(props.localFS);
} else {
throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
}
return reader;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class DynamicReadMatrixCP method execute.
@Override
public void execute() {
try {
String fname = ((Scalar) this.getFunctionInput(0)).getValue();
Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
String format = ((Scalar) this.getFunctionInput(3)).getValue();
InputInfo ii = InputInfo.stringToInputInfo(format);
OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
String fnameTmp = createOutputFilePathAndName("TMP");
_ret = new Matrix(fnameTmp, m, n, ValueType.Double);
_ret.setMatrixDoubleArray(mbTmp, oi, ii);
//NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given
// matrix block. This leads to a dirty state of the new object. Hence, the resulting
// intermediate plan variable will be exported in front of MR jobs and during this export
// the format will be changed to binary block (the contract of external functions),
// no matter in which format the original matrix was.
} catch (Exception e) {
throw new RuntimeException("Error executing dynamic read of matrix", e);
}
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class DynamicReadMatrixRcCP method execute.
@Override
public void execute() {
try {
String fname = ((Scalar) this.getFunctionInput(0)).getValue();
Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
String format = ((Scalar) this.getFunctionInput(3)).getValue();
InputInfo ii = InputInfo.stringToInputInfo(format);
OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
String fnameTmp = createOutputFilePathAndName("TMP");
_ret = new Matrix(fnameTmp, m, n, ValueType.Double);
MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
_ret.setMatrixDoubleArray(mbTmp, oi, ii);
_rc = new Scalar(ScalarValueType.Integer, "0");
//NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given
// matrix block. This leads to a dirty state of the new object. Hence, the resulting
// intermediate plan variable will be exported in front of MR jobs and during this export
// the format will be changed to binary block (the contract of external functions),
// no matter in which format the original matrix was.
} catch (Exception e) {
_rc = new Scalar(ScalarValueType.Integer, "1");
// throw new PackageRuntimeException("Error executing dynamic read of matrix",e);
}
}
Aggregations