Search in sources :

Example 11 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class DataPartitionerRemoteMapper method configure.

@Override
public void configure(JobConf job) {
    MatrixCharacteristics mc = MRJobConfiguration.getPartitionedMatrixSize(job);
    InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
    OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
    PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
    int n = MRJobConfiguration.getPartitioningSizeN(job);
    boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
    if (ii == InputInfo.TextCellInputInfo)
        _mapper = new DataPartitionerMapperTextcell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryCellInputInfo)
        _mapper = new DataPartitionerMapperBinarycell(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n);
    else if (ii == InputInfo.BinaryBlockInputInfo) {
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            _mapper = new DataPartitionerMapperBinaryblock(mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes);
        else if (oi == OutputInfo.BinaryCellOutputInfo) {
            // fused parfor
            boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
            _mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, mc.getRows(), mc.getCols(), mc.getRowsPerBlock(), mc.getColsPerBlock(), pdf, n, keepIndexes, outputEmpty);
        } else
            throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
    } else
        throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 12 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class MatrixReaderFactory method createMatrixReader.

public static MatrixReader createMatrixReader(ReadProperties props) {
    // check valid read properties
    if (props == null)
        throw new DMLRuntimeException("Failed to create matrix reader with empty properties.");
    MatrixReader reader = null;
    InputInfo iinfo = props.inputInfo;
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCellParallel(iinfo);
        else
            reader = new ReaderTextCell(iinfo);
    } else if (iinfo == InputInfo.CSVInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCSVParallel(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
        else
            reader = new ReaderTextCSV(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
    } else if (iinfo == InputInfo.BinaryCellInputInfo)
        reader = new ReaderBinaryCell();
    else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderBinaryBlockParallel(props.localFS);
        else
            reader = new ReaderBinaryBlock(props.localFS);
    } else {
        throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
    }
    return reader;
}
Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 13 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class Dag method getInputPathsAndParameters.

// Method to populate inputs and also populates node index mapping.
private static void getInputPathsAndParameters(Lop node, ArrayList<Lop> execNodes, ArrayList<String> inputStrings, ArrayList<InputInfo> inputInfos, ArrayList<Long> numRows, ArrayList<Long> numCols, ArrayList<Long> numRowsPerBlock, ArrayList<Long> numColsPerBlock, HashMap<Lop, Integer> nodeIndexMapping, ArrayList<String> inputLabels, ArrayList<Lop> inputLops, ArrayList<Integer> MRJobLineNumbers) {
    // treat rand as an input.
    if (node.getType() == Type.DataGen && execNodes.contains(node) && !nodeIndexMapping.containsKey(node)) {
        numRows.add(node.getOutputParameters().getNumRows());
        numCols.add(node.getOutputParameters().getNumCols());
        numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
        numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
        inputStrings.add(node.getInstructions(inputStrings.size(), inputStrings.size()));
        if (DMLScript.ENABLE_DEBUG_MODE) {
            MRJobLineNumbers.add(node._beginLine);
        }
        inputInfos.add(InputInfo.TextCellInputInfo);
        nodeIndexMapping.put(node, inputStrings.size() - 1);
        return;
    }
    // get input file names
    if (!execNodes.contains(node) && !nodeIndexMapping.containsKey(node) && !(node.getExecLocation() == ExecLocation.Data) && (!(node.getExecLocation() == ExecLocation.ControlProgram && node.getDataType() == DataType.SCALAR)) || (!execNodes.contains(node) && node.getExecLocation() == ExecLocation.Data && ((Data) node).getOperationType() == Data.OperationTypes.READ && ((Data) node).getDataType() != DataType.SCALAR && !nodeIndexMapping.containsKey(node))) {
        if (node.getOutputParameters().getFile_name() != null) {
            inputStrings.add(node.getOutputParameters().getFile_name());
        } else {
            // use label name
            inputStrings.add(Lop.VARIABLE_NAME_PLACEHOLDER + node.getOutputParameters().getLabel() + Lop.VARIABLE_NAME_PLACEHOLDER);
        }
        inputLabels.add(node.getOutputParameters().getLabel());
        inputLops.add(node);
        numRows.add(node.getOutputParameters().getNumRows());
        numCols.add(node.getOutputParameters().getNumCols());
        numRowsPerBlock.add(node.getOutputParameters().getRowsInBlock());
        numColsPerBlock.add(node.getOutputParameters().getColsInBlock());
        InputInfo nodeInputInfo = null;
        // Check if file format type is binary or text and update infos
        if (node.getOutputParameters().isBlocked()) {
            if (node.getOutputParameters().getFormat() == Format.BINARY)
                nodeInputInfo = InputInfo.BinaryBlockInputInfo;
            else
                throw new LopsException("Invalid format (" + node.getOutputParameters().getFormat() + ") encountered for a node/lop (ID=" + node.getID() + ") with blocked output.");
        } else {
            if (node.getOutputParameters().getFormat() == Format.TEXT)
                nodeInputInfo = InputInfo.TextCellInputInfo;
            else
                nodeInputInfo = InputInfo.BinaryCellInputInfo;
        }
        // the information on key/value classes
        if (node.getType() == Type.SortKeys) {
            // SortKeys is the input to some other lop (say, L)
            // InputInfo of L is the ouputInfo of SortKeys, which is
            // (compactformat, doubleWriteable, IntWritable)
            nodeInputInfo = new InputInfo(PickFromCompactInputFormat.class, DoubleWritable.class, IntWritable.class);
        } else if (node.getType() == Type.CombineBinary) {
            // CombineBinary is the input to some other lop (say, L)
            // InputInfo of L is the ouputInfo of CombineBinary
            // And, the outputInfo of CombineBinary depends on the operation!
            CombineBinary combine = (CombineBinary) node;
            if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreSort) {
                nodeInputInfo = new InputInfo(SequenceFileInputFormat.class, DoubleWritable.class, IntWritable.class);
            } else if (combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCentralMoment || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreCovUnweighted || combine.getOperation() == org.apache.sysml.lops.CombineBinary.OperationTypes.PreGroupedAggUnweighted) {
                nodeInputInfo = InputInfo.WeightedPairInputInfo;
            }
        } else if (node.getType() == Type.CombineTernary) {
            nodeInputInfo = InputInfo.WeightedPairInputInfo;
        }
        inputInfos.add(nodeInputInfo);
        nodeIndexMapping.put(node, inputStrings.size() - 1);
        return;
    }
    // if exec nodes does not contain node at this point, return.
    if (!execNodes.contains(node))
        return;
    // process children recursively
    for (Lop lop : node.getInputs()) {
        getInputPathsAndParameters(lop, execNodes, inputStrings, inputInfos, numRows, numCols, numRowsPerBlock, numColsPerBlock, nodeIndexMapping, inputLabels, inputLops, MRJobLineNumbers);
    }
}
Also used : CombineBinary(org.apache.sysml.lops.CombineBinary) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) LopsException(org.apache.sysml.lops.LopsException) PickFromCompactInputFormat(org.apache.sysml.runtime.matrix.sort.PickFromCompactInputFormat) Data(org.apache.sysml.lops.Data) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Lop(org.apache.sysml.lops.Lop) IntWritable(org.apache.hadoop.io.IntWritable)

Example 14 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class CacheableData method toString.

@Override
public String toString() {
    StringBuilder str = new StringBuilder();
    str.append(getClass().getSimpleName());
    str.append(": ");
    str.append(_hdfsFileName + ", ");
    if (_metaData instanceof MetaDataNumItemsByEachReducer) {
        str.append("NumItemsByEachReducerMetaData");
    } else {
        try {
            MetaDataFormat md = (MetaDataFormat) _metaData;
            if (md != null) {
                MatrixCharacteristics mc = _metaData.getMatrixCharacteristics();
                str.append(mc.toString());
                InputInfo ii = md.getInputInfo();
                if (ii == null)
                    str.append("null");
                else {
                    str.append(", ");
                    str.append(InputInfo.inputInfoToString(ii));
                }
            } else {
                str.append("null, null");
            }
        } catch (Exception ex) {
            LOG.error(ex);
        }
    }
    str.append(", ");
    str.append(isDirty() ? "dirty" : "not-dirty");
    return str.toString();
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MetaDataNumItemsByEachReducer(org.apache.sysml.runtime.matrix.MetaDataNumItemsByEachReducer) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 15 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class DynamicReadMatrixCP method execute.

@Override
public void execute() {
    try {
        String fname = ((Scalar) this.getFunctionInput(0)).getValue();
        Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
        Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
        String format = ((Scalar) this.getFunctionInput(3)).getValue();
        InputInfo ii = InputInfo.stringToInputInfo(format);
        OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
        MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        String fnameTmp = createOutputFilePathAndName("TMP");
        _ret = new Matrix(fnameTmp, m, n, ValueType.Double);
        _ret.setMatrixDoubleArray(mbTmp, oi, ii);
    // NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given
    // matrix block. This leads to a dirty state of the new object. Hence, the resulting
    // intermediate plan variable will be exported in front of MR jobs and during this export
    // the format will be changed to binary block (the contract of external functions),
    // no matter in which format the original matrix was.
    } catch (Exception e) {
        throw new RuntimeException("Error executing dynamic read of matrix", e);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) Matrix(org.apache.sysml.udf.Matrix) Scalar(org.apache.sysml.udf.Scalar)

Aggregations

InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)38 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)20 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)15 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)13 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)11 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)10 IOException (java.io.IOException)9 JobConf (org.apache.hadoop.mapred.JobConf)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)6 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)6 Path (org.apache.hadoop.fs.Path)5 RunningJob (org.apache.hadoop.mapred.RunningJob)5 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)5 DMLConfig (org.apache.sysml.conf.DMLConfig)4 ValueType (org.apache.sysml.parser.Expression.ValueType)4 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)4 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)4 ArrayList (java.util.ArrayList)3 Group (org.apache.hadoop.mapred.Counters.Group)3