Search in sources :

Example 6 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class DataPartitioner method createPartitionedMatrixObject.

/**
	 * Creates a partitioned matrix object based on the given input matrix object, 
	 * according to the specified split format. The input matrix can be in-memory
	 * or still on HDFS and the partitioned output matrix is written to HDFS. The
	 * created matrix object can be used transparently for obtaining the full matrix
	 * or reading 1 or multiple partitions based on given index ranges. 
	 * 
	 * @param in input matrix object
	 * @param out output matrix object
	 * @param force if false, try to optimize
	 * @return partitioned matrix object
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
public MatrixObject createPartitionedMatrixObject(MatrixObject in, MatrixObject out, boolean force) throws DMLRuntimeException {
    //check for naive partitioning
    if (_format == PDataPartitionFormat.NONE)
        return in;
    //analyze input matrix object
    MatrixFormatMetaData meta = (MatrixFormatMetaData) in.getMetaData();
    MatrixCharacteristics mc = meta.getMatrixCharacteristics();
    InputInfo ii = meta.getInputInfo();
    OutputInfo oi = meta.getOutputInfo();
    long rows = mc.getRows();
    long cols = mc.getCols();
    int brlen = mc.getRowsPerBlock();
    int bclen = mc.getColsPerBlock();
    long nonZeros = mc.getNonZeros();
    double sparsity = (nonZeros >= 0 && rows > 0 && cols > 0) ? ((double) nonZeros) / (rows * cols) : 1.0;
    if (//try to optimize, if format not forced
    !force) {
        //check lower bound of useful data partitioning
        if (//or matrix already fits in mem
        rows < Hop.CPThreshold && cols < Hop.CPThreshold) {
            return in;
        }
        //check for changing to blockwise representations
        if (_format == PDataPartitionFormat.ROW_WISE && cols < Hop.CPThreshold) {
            LOG.debug("Changing format from " + PDataPartitionFormat.ROW_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
            _format = PDataPartitionFormat.ROW_BLOCK_WISE;
        }
        if (_format == PDataPartitionFormat.COLUMN_WISE && rows < Hop.CPThreshold) {
            LOG.debug("Changing format from " + PDataPartitionFormat.COLUMN_WISE + " to " + PDataPartitionFormat.ROW_BLOCK_WISE + ".");
            _format = PDataPartitionFormat.COLUMN_BLOCK_WISE;
        }
    //_format = PDataPartitionFormat.ROW_BLOCK_WISE_N;
    }
    //check changing to binarycell in case of sparse cols (robustness)
    boolean convertBlock2Cell = false;
    if (ii == InputInfo.BinaryBlockInputInfo && _allowBinarycell && _format == PDataPartitionFormat.COLUMN_WISE && sparsity < SPARSITY_CELL_THRESHOLD) {
        LOG.debug("Changing partition outputinfo from binaryblock to binarycell due to sparsity=" + sparsity);
        oi = OutputInfo.BinaryCellOutputInfo;
        convertBlock2Cell = true;
    }
    //prepare filenames and cleanup if required
    String fnameNew = out.getFileName();
    try {
        MapReduceTool.deleteFileIfExistOnHDFS(fnameNew);
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    //core partitioning (depending on subclass)
    partitionMatrix(in, fnameNew, ii, oi, rows, cols, brlen, bclen);
    //create output matrix object
    out.setPartitioned(_format, _n);
    MatrixCharacteristics mcNew = new MatrixCharacteristics(rows, cols, (int) brlen, (int) bclen);
    mcNew.setNonZeros(nonZeros);
    if (convertBlock2Cell)
        ii = InputInfo.BinaryCellInputInfo;
    MatrixFormatMetaData metaNew = new MatrixFormatMetaData(mcNew, oi, ii);
    out.setMetaData(metaNew);
    return out;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 7 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class DataPartitionerRemoteMapper method configure.

@Override
public void configure(JobConf job) {
    long rlen = MRJobConfiguration.getPartitioningNumRows(job);
    long clen = MRJobConfiguration.getPartitioningNumCols(job);
    int brlen = MRJobConfiguration.getPartitioningBlockNumRows(job);
    int bclen = MRJobConfiguration.getPartitioningBlockNumCols(job);
    InputInfo ii = MRJobConfiguration.getPartitioningInputInfo(job);
    OutputInfo oi = MRJobConfiguration.getPartitioningOutputInfo(job);
    PDataPartitionFormat pdf = MRJobConfiguration.getPartitioningFormat(job);
    int n = MRJobConfiguration.getPartitioningSizeN(job);
    boolean keepIndexes = MRJobConfiguration.getPartitioningIndexFlag(job);
    if (ii == InputInfo.TextCellInputInfo)
        _mapper = new DataPartitionerMapperTextcell(rlen, clen, brlen, bclen, pdf, n);
    else if (ii == InputInfo.BinaryCellInputInfo)
        _mapper = new DataPartitionerMapperBinarycell(rlen, clen, brlen, bclen, pdf, n);
    else if (ii == InputInfo.BinaryBlockInputInfo) {
        if (oi == OutputInfo.BinaryBlockOutputInfo)
            _mapper = new DataPartitionerMapperBinaryblock(rlen, clen, brlen, bclen, pdf, n, keepIndexes);
        else if (oi == OutputInfo.BinaryCellOutputInfo) {
            //fused parfor
            boolean outputEmpty = MRJobConfiguration.getProgramBlocks(job) != null;
            _mapper = new DataPartitionerMapperBinaryblock2Binarycell(job, rlen, clen, brlen, bclen, pdf, n, keepIndexes, outputEmpty);
        } else
            throw new RuntimeException("Partitioning from '" + ii + "' to '" + oi + "' not supported");
    } else
        throw new RuntimeException("Unable to configure mapper with unknown input info: " + ii.toString());
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo)

Example 8 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class SortMR method runStitchupJob.

private static boolean runStitchupJob(String input, long rlen, long clen, int brlen, int bclen, long[] counts, int numReducers, int replication, String output) throws Exception {
    JobConf job = new JobConf(SortMR.class);
    job.setJobName("SortIndexesMR");
    //setup input/output paths
    Path inpath = new Path(input);
    Path outpath = new Path(output);
    FileInputFormat.setInputPaths(job, inpath);
    FileOutputFormat.setOutputPath(job, outpath);
    MapReduceTool.deleteFileIfExistOnHDFS(outpath, job);
    //set number of reducers (1 if local mode)
    if (InfrastructureAnalyzer.isLocalMode(job))
        job.setNumReduceTasks(1);
    else
        MRJobConfiguration.setNumReducers(job, numReducers, numReducers);
    //setup input/output format
    InputInfo iinfo = InputInfo.BinaryBlockInputInfo;
    OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo;
    job.setInputFormat(iinfo.inputFormatClass);
    job.setOutputFormat(oinfo.outputFormatClass);
    CompactInputFormat.setKeyValueClasses(job, MatrixIndexes.class, MatrixBlock.class);
    //setup mapper/reducer/output classes
    MRJobConfiguration.setInputInfo(job, (byte) 0, InputInfo.BinaryBlockInputInfo, brlen, bclen, ConvertTarget.BLOCK);
    job.setMapperClass(IndexSortStitchupMapper.class);
    job.setReducerClass(IndexSortStitchupReducer.class);
    job.setOutputKeyClass(oinfo.outputKeyClass);
    job.setOutputValueClass(oinfo.outputValueClass);
    MRJobConfiguration.setBlockSize(job, (byte) 0, brlen, bclen);
    MRJobConfiguration.setMatricesDimensions(job, new byte[] { 0 }, new long[] { rlen }, new long[] { clen });
    //compute shifted prefix sum of offsets and put into configuration
    long[] cumsumCounts = new long[counts.length];
    long sum = 0;
    for (int i = 0; i < counts.length; i++) {
        cumsumCounts[i] = sum;
        sum += counts[i];
    }
    job.set(SORT_INDEXES_OFFSETS, Arrays.toString(cumsumCounts));
    //setup replication factor
    job.setInt(MRConfigurationNames.DFS_REPLICATION, replication);
    //set unique working dir
    MRJobConfiguration.setUniqueWorkingDir(job);
    //run mr job
    RunningJob runJob = JobClient.runJob(job);
    return runJob.isSuccessful();
}
Also used : Path(org.apache.hadoop.fs.Path) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) RunningJob(org.apache.hadoop.mapred.RunningJob) JobConf(org.apache.hadoop.mapred.JobConf)

Example 9 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class VariableCPInstruction method writeMMFile.

/**
	 * Helper function to write MM files to HDFS.
	 * 
	 * @param ec execution context
	 * @param fname file name
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private void writeMMFile(ExecutionContext ec, String fname) throws DMLRuntimeException {
    MatrixObject mo = ec.getMatrixObject(input1.getName());
    String outFmt = "matrixmarket";
    if (mo.isDirty()) {
        // there exist data computed in CP that is not backed up on HDFS
        // i.e., it is either in-memory or in evicted space
        mo.exportData(fname, outFmt);
    } else {
        OutputInfo oi = ((MatrixFormatMetaData) mo.getMetaData()).getOutputInfo();
        MatrixCharacteristics mc = mo.getMatrixCharacteristics();
        if (oi == OutputInfo.TextCellOutputInfo) {
            try {
                WriterMatrixMarket writer = new WriterMatrixMarket();
                writer.mergeTextcellToMatrixMarket(mo.getFileName(), fname, mc.getRows(), mc.getCols(), mc.getNonZeros());
            } catch (IOException e) {
                throw new DMLRuntimeException(e);
            }
        } else if (oi == OutputInfo.BinaryBlockOutputInfo) {
            mo.exportData(fname, outFmt);
        } else {
            throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into MatrixMarket format.");
        }
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) WriterMatrixMarket(org.apache.sysml.runtime.io.WriterMatrixMarket) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) IOException(java.io.IOException) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 10 with OutputInfo

use of org.apache.sysml.runtime.matrix.data.OutputInfo in project incubator-systemml by apache.

the class VariableCPInstruction method writeCSVFile.

/**
	 * Helper function to write CSV files to HDFS.
	 * 
	 * @param ec execution context
	 * @param fname file name
	 * @throws DMLRuntimeException if DMLRuntimeException occurs
	 */
private void writeCSVFile(ExecutionContext ec, String fname) throws DMLRuntimeException {
    MatrixObject mo = ec.getMatrixObject(input1.getName());
    String outFmt = "csv";
    if (mo.isDirty()) {
        // there exist data computed in CP that is not backed up on HDFS
        // i.e., it is either in-memory or in evicted space
        mo.exportData(fname, outFmt, _formatProperties);
    } else {
        try {
            OutputInfo oi = ((MatrixFormatMetaData) mo.getMetaData()).getOutputInfo();
            MatrixCharacteristics mc = ((MatrixFormatMetaData) mo.getMetaData()).getMatrixCharacteristics();
            if (oi == OutputInfo.CSVOutputInfo) {
                WriterTextCSV writer = new WriterTextCSV((CSVFileFormatProperties) _formatProperties);
                writer.addHeaderToCSV(mo.getFileName(), fname, mc.getRows(), mc.getCols());
            } else if (oi == OutputInfo.BinaryBlockOutputInfo || oi == OutputInfo.TextCellOutputInfo) {
                mo.exportData(fname, outFmt, _formatProperties);
            } else {
                throw new DMLRuntimeException("Unexpected data format (" + OutputInfo.outputInfoToString(oi) + "): can not export into CSV format.");
            }
            // Write Metadata file
            MapReduceTool.writeMetaDataFile(fname + ".mtd", mo.getValueType(), mc, OutputInfo.CSVOutputInfo, _formatProperties);
        } catch (IOException e) {
            throw new DMLRuntimeException(e);
        }
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) WriterTextCSV(org.apache.sysml.runtime.io.WriterTextCSV) IOException(java.io.IOException) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)34 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)17 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)15 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)14 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)13 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)12 IOException (java.io.IOException)8 ValueType (org.apache.sysml.parser.Expression.ValueType)8 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)5 HashMap (java.util.HashMap)3 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)3 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)3 Matrix (org.apache.sysml.udf.Matrix)3 Scalar (org.apache.sysml.udf.Scalar)3 ArrayList (java.util.ArrayList)2 Path (org.apache.hadoop.fs.Path)2 JobConf (org.apache.hadoop.mapred.JobConf)2 RunningJob (org.apache.hadoop.mapred.RunningJob)2 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)2 LopsException (org.apache.sysml.lops.LopsException)2