use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class DataPartitionerRemoteMapper method configure.
@Override
public void configure(JobConf job) {
MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
int n = MRJobConfiguration.getPartitioningSizeN(job);
boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
if (ii == InputInfo.TextCellInputInfo)
_mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryCellInputInfo)
_mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
else if (ii == InputInfo.BinaryBlockInputInfo) {
if (oi == OutputInfo.BinaryBlockOutputInfo)
_mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
else if (oi == OutputInfo.BinaryCellOutputInfo) {
// fused parfor
boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
_mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
} else
throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
} else
throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class MatrixReaderFactory method createMatrixReader.
public static MatrixReader createMatrixReader(ReadProperties props) {
// check valid read properties
if (props == null)
throw new DMLRuntimeException("Failed to create matrix reader with empty properties.");
MatrixReader reader = null;
InputInfo iinfo = props.inputInfo;
if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCellParallel(iinfo);
else
reader = new ReaderTextCell(iinfo);
} else if (iinfo == InputInfo.CSVInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderTextCSVParallel(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
else
reader = new ReaderTextCSV(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
} else if (iinfo == InputInfo.BinaryCellInputInfo)
reader = new ReaderBinaryCell();
else if (iinfo == InputInfo.BinaryBlockInputInfo) {
if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
reader = new ReaderBinaryBlockParallel(props.localFS);
else
reader = new ReaderBinaryBlock(props.localFS);
} else {
throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
}
return reader;
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class Dag method getInputPathsAndParameters.
// Method to populate inputs and also populates node index mapping.
private static void getInputPathsAndParameters(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<InputInfo> inputInfos, ArrayList<Long> numRows, ArrayList<Long> numCols, ArrayList<Long> numRowsPerBlock, ArrayList<Long> numColsPerBlock, HashMap<Lop, Integer> nodeIndexMapping, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
// treat rand as an input.
if (node.getType() == Type.DataGen && execNodes.contains(node) && !nodeIndexMapping.containsKey(node)) {
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
inputStrings.add(node.getInstructions(inputStrings.size(), inputStrings.size()));
if (DMLScript.ENABLE_DEBUG_MODE) {
MRJobLineNumbers.add(node._beginLine);
}
inputInfos.add(InputInfo.TextCellInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// get input file names
if (!execNodes.contains(node) && !nodeIndexMapping.containsKey(node) && !(node.getExecLocation() == ExecLocation.Data) && (!(node.getExecLocation() == ExecLocation.ControlProgram && node.getDataType() == DataType.SCALAR)) || (!execNodes.contains(node) && node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() != DataType.SCALAR && !nodeIndexMapping.containsKey(node))) {
if (node.getOutputParameters().getFile_name() != null) {
inputStrings.add(node.getOutputParameters().getFile_name());
} else {
// use label name
inputStrings.add(Lop.VARIABLE_NAME_PLACEHOLDER + node.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER);
}
inputLabels.add(node.getOutputParameters().getLabel());
inputLops.add(node);
numRows.add(node.getOutputParameters().getNumRows());
numCols.add(node.getOutputParameters().getNumCols());
numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
InputInfo nodeInputInfo = null;
// Check if file format type is binary or text and update infos
if (node.getOutputParameters().isBlocked()) {
if (node.getOutputParameters().getFormat() == Format.BINARY)
nodeInputInfo = InputInfo.BinaryBlockInputInfo;
else
throw new LopsException("Invalid format (" + node.getOutputParameters().getFormat() + ") encountered for a node/lop (ID=" + node.getID() + ") with blocked output.");
} else {
if (node.getOutputParameters().getFormat() == Format.TEXT)
nodeInputInfo = InputInfo.TextCellInputInfo;
else
nodeInputInfo = InputInfo.BinaryCellInputInfo;
}
// the information on key/value classes
if (node.getType() == Type.SortKeys) {
// SortKeys is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of SortKeys, which is
// (compactformat, doubleWriteable, IntWritable)
nodeInputInfo = new InputInfo(PickFromCompactInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (node.getType() == Type.CombineBinary) {
// CombineBinary is the input to some other lop (say, L)
// InputInfo of L is the ouputInfo of CombineBinary
// And, the outputInfo of CombineBinary depends on the operation!
CombineBinary combine = (CombineBinary) node;
if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
nodeInputInfo = new InputInfo(SequenceFileInputFormat.class, DoubleWritable.class, IntWritable.class);
} else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
} else if (node.getType() == Type.CombineTernary) {
nodeInputInfo = InputInfo.WeightedPairInputInfo;
}
inputInfos.add(nodeInputInfo);
nodeIndexMapping.put(node, inputStrings.size() - 1);
return;
}
// if exec nodes does not contain node at this point, return.
if (!execNodes.contains(node))
return;
// process children recursively
for (Lop lop : node.getInputs()) {
getInputPathsAndParameters(lop, execNodes, inputStrings, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
}
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class CacheableData method toString.
@Override
public String toString() {
StringBuilder str = new StringBuilder();
str.append(getClass().getSimpleName());
str.append(": ");
str.append(_hdfsFileName + ", ");
if (_metaData instanceof MetaDataNumItemsByEachReducer) {
str.append("NumItemsByEachReducerMetaData");
} else {
try {
MetaDataFormat md = (MetaDataFormat) _metaData;
if (md != null) {
MatrixCharacteristics mc = _metaData.getMatrixCharacteristics();
str.append(mc.toString());
InputInfo ii = md.getInputInfo();
if (ii == null)
str.append("null");
else {
str.append(", ");
str.append(InputInfo.inputInfoToString(ii));
}
} else {
str.append("null, null");
}
} catch (Exception ex) {
LOG.error(ex);
}
}
str.append(", ");
str.append(isDirty() ? "dirty" : "not-dirty");
return str.toString();
}
use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.
the class DynamicReadMatrixCP method execute.
@Override
public void execute() {
try {
String fname = ((Scalar) this.getFunctionInput(0)).getValue();
Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
String format = ((Scalar) this.getFunctionInput(3)).getValue();
InputInfo ii = InputInfo.stringToInputInfo(format);
OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
String fnameTmp = createOutputFilePathAndName("TMP");
_ret = new Matrix(fnameTmp, m, n, ValueType.Double);
_ret.setMatrixDoubleArray(mbTmp, oi, ii);
// NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given
// matrix block. This leads to a dirty state of the new object. Hence, the resulting
// intermediate plan variable will be exported in front of MR jobs and during this export
// the format will be changed to binary block (the contract of external functions),
// no matter in which format the original matrix was.
} catch (Exception e) {
throw new RuntimeException("Error executing dynamic read of matrix", e);
}
}
Aggregations