Search in sources :

Example 6 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeRemoteMapper method configure.

public void configure(JobConf job) {
    InputInfo ii = MRJobConfiguration.getResultMergeInputInfo(job);
    long[] tmp = MRJobConfiguration.getResultMergeMatrixCharacteristics(job);
    String compareFname = MRJobConfiguration.getResultMergeInfoCompareFilename(job);
    String currentFname = job.get(MRConfigurationNames.MR_MAP_INPUT_FILE);
    byte tag = 0;
    //startsWith comparison in order to account for part names in currentFname
    if (currentFname.startsWith(compareFname))
        tag = ResultMergeRemoteMR.COMPARE_TAG;
    else
        tag = ResultMergeRemoteMR.DATA_TAG;
    if (ii == InputInfo.TextCellInputInfo)
        _mapper = new ResultMergeMapperTextCell(tag);
    else if (ii == InputInfo.BinaryCellInputInfo)
        _mapper = new ResultMergeMapperBinaryCell(tag);
    else if (ii == InputInfo.BinaryBlockInputInfo)
        _mapper = new ResultMergeMapperBinaryBlock(tag, tmp[0], tmp[1], tmp[2], tmp[3]);
    else
        throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo)

Example 7 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class ResultMergeRemoteSpark method executeParallelMerge.

@Override
public MatrixObject executeParallelMerge(int par) throws DMLRuntimeException {
    //always create new matrix object (required for nested parallelism)
    MatrixObject moNew = null;
    LOG.trace("ResultMerge (remote, spark): Execute serial merge for output " + _output.getVarName() + " (fname=" + _output.getFileName() + ")");
    try {
        if (_inputs != null && _inputs.length > 0) {
            //prepare compare
            MatrixFormatMetaData metadata = (MatrixFormatMetaData) _output.getMetaData();
            MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
            MatrixObject compare = (mcOld.getNonZeros() == 0) ? null : _output;
            //actual merge
            RDDObject ro = executeMerge(compare, _inputs, _output.getVarName(), mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            //create new output matrix (e.g., to prevent potential export<->read file access conflict
            String varName = _output.getVarName();
            ValueType vt = _output.getValueType();
            moNew = new MatrixObject(vt, _outputFName);
            moNew.setVarName(varName.contains(NAME_SUFFIX) ? varName : varName + NAME_SUFFIX);
            moNew.setDataType(DataType.MATRIX);
            OutputInfo oiOld = metadata.getOutputInfo();
            InputInfo iiOld = metadata.getInputInfo();
            MatrixCharacteristics mc = new MatrixCharacteristics(mcOld.getRows(), mcOld.getCols(), mcOld.getRowsPerBlock(), mcOld.getColsPerBlock());
            mc.setNonZeros(computeNonZeros(_output, convertToList(_inputs)));
            MatrixFormatMetaData meta = new MatrixFormatMetaData(mc, oiOld, iiOld);
            moNew.setMetaData(meta);
            moNew.setRDDHandle(ro);
        } else {
            //return old matrix, to prevent copy
            moNew = _output;
        }
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) ValueType(org.apache.sysml.parser.Expression.ValueType) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 8 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class MatrixReaderFactory method createMatrixReader.

public static MatrixReader createMatrixReader(ReadProperties props) throws DMLRuntimeException {
    //check valid read properties
    if (props == null)
        throw new DMLRuntimeException("Failed to create matrix reader with empty properties.");
    MatrixReader reader = null;
    InputInfo iinfo = props.inputInfo;
    if (iinfo == InputInfo.TextCellInputInfo || iinfo == InputInfo.MatrixMarketInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCellParallel(iinfo);
        else
            reader = new ReaderTextCell(iinfo);
    } else if (iinfo == InputInfo.CSVInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_TEXTFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderTextCSVParallel(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
        else
            reader = new ReaderTextCSV(props.formatProperties != null ? (CSVFileFormatProperties) props.formatProperties : new CSVFileFormatProperties());
    } else if (iinfo == InputInfo.BinaryCellInputInfo)
        reader = new ReaderBinaryCell();
    else if (iinfo == InputInfo.BinaryBlockInputInfo) {
        if (ConfigurationManager.getCompilerConfigFlag(ConfigType.PARALLEL_CP_READ_BINARYFORMATS) && MatrixBlock.DEFAULT_SPARSEBLOCK == SparseBlock.Type.MCSR)
            reader = new ReaderBinaryBlockParallel(props.localFS);
        else
            reader = new ReaderBinaryBlock(props.localFS);
    } else {
        throw new DMLRuntimeException("Failed to create matrix reader for unknown input info: " + InputInfo.inputInfoToString(iinfo));
    }
    return reader;
}
Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) CSVFileFormatProperties(org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 9 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class DynamicReadMatrixCP method execute.

@Override
public void execute() {
    try {
        String fname = ((Scalar) this.getFunctionInput(0)).getValue();
        Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
        Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
        String format = ((Scalar) this.getFunctionInput(3)).getValue();
        InputInfo ii = InputInfo.stringToInputInfo(format);
        OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
        MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        String fnameTmp = createOutputFilePathAndName("TMP");
        _ret = new Matrix(fnameTmp, m, n, ValueType.Double);
        _ret.setMatrixDoubleArray(mbTmp, oi, ii);
    //NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given 
    // matrix block. This leads to a dirty state of the new object. Hence, the resulting 
    // intermediate plan variable will be exported in front of MR jobs and during this export 
    // the format will be changed to binary block (the contract of external functions), 
    // no matter in which format the original matrix was.
    } catch (Exception e) {
        throw new RuntimeException("Error executing dynamic read of matrix", e);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) Matrix(org.apache.sysml.udf.Matrix) Scalar(org.apache.sysml.udf.Scalar)

Example 10 with InputInfo

use of org.apache.sysml.runtime.matrix.data.InputInfo in project incubator-systemml by apache.

the class DynamicReadMatrixRcCP method execute.

@Override
public void execute() {
    try {
        String fname = ((Scalar) this.getFunctionInput(0)).getValue();
        Integer m = Integer.parseInt(((Scalar) this.getFunctionInput(1)).getValue());
        Integer n = Integer.parseInt(((Scalar) this.getFunctionInput(2)).getValue());
        String format = ((Scalar) this.getFunctionInput(3)).getValue();
        InputInfo ii = InputInfo.stringToInputInfo(format);
        OutputInfo oi = OutputInfo.BinaryBlockOutputInfo;
        String fnameTmp = createOutputFilePathAndName("TMP");
        _ret = new Matrix(fnameTmp, m, n, ValueType.Double);
        MatrixBlock mbTmp = DataConverter.readMatrixFromHDFS(fname, ii, m, n, ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize());
        _ret.setMatrixDoubleArray(mbTmp, oi, ii);
        _rc = new Scalar(ScalarValueType.Integer, "0");
    //NOTE: The packagesupport wrapper creates a new MatrixObjectNew with the given 
    // matrix block. This leads to a dirty state of the new object. Hence, the resulting 
    // intermediate plan variable will be exported in front of MR jobs and during this export 
    // the format will be changed to binary block (the contract of external functions), 
    // no matter in which format the original matrix was.
    } catch (Exception e) {
        _rc = new Scalar(ScalarValueType.Integer, "1");
    //			throw new PackageRuntimeException("Error executing dynamic read of matrix",e);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) Matrix(org.apache.sysml.udf.Matrix) Scalar(org.apache.sysml.udf.Scalar)

Aggregations

InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)37 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)22 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)15 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)12 IOException (java.io.IOException)10 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)10 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)10 ValueType (org.apache.sysml.parser.Expression.ValueType)7 JobConf (org.apache.hadoop.mapred.JobConf)6 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)6 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 RunningJob (org.apache.hadoop.mapred.RunningJob)5 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)5 Path (org.apache.hadoop.fs.Path)4 DMLConfig (org.apache.sysml.conf.DMLConfig)4 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)4 MatrixIndexes (org.apache.sysml.runtime.matrix.data.MatrixIndexes)4 ArrayList (java.util.ArrayList)3 Group (org.apache.hadoop.mapred.Counters.Group)3 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)3