Search in sources :

Example 21 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class CacheableData method isEqualOutputFormat.

protected boolean isEqualOutputFormat(String outputFormat) {
    boolean ret = true;
    if (outputFormat != null) {
        try {
            MetaDataFormat iimd = (MetaDataFormat) _metaData;
            OutputInfo oi1 = InputInfo.getMatchingOutputInfo(iimd.getInputInfo());
            OutputInfo oi2 = OutputInfo.stringToOutputInfo(outputFormat);
            if (oi1 != oi2)
                ret = false;
        } catch (Exception ex) {
            ret = false;
        }
    }
    return ret;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException)

Example 22 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class CacheableData method writeMetaData.

protected void writeMetaData(String filePathAndName, String outputFormat, FileFormatProperties formatProperties) throws IOException {
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    if (iimd == null)
        throw new DMLRuntimeException("Unexpected error while writing mtd file (" + filePathAndName + ") -- metadata is null.");
    // Write the matrix to HDFS in requested format
    OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
    if (oinfo != OutputInfo.MatrixMarketOutputInfo) {
        // Get the dimension information from the metadata stored within MatrixObject
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        // note: this is only required if singlenode (due to binarycell default)
        if (oinfo == OutputInfo.BinaryBlockOutputInfo && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE && (mc.getRowsPerBlock() != ConfigurationManager.getBlocksize() || mc.getColsPerBlock() != ConfigurationManager.getBlocksize())) {
            mc = new MatrixCharacteristics(mc.getRows(), mc.getCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), mc.getNonZeros());
        }
        // write the actual meta data file
        MapReduceTool.writeMetaDataFile(filePathAndName + ".mtd", valueType, getSchema(), dataType, mc, oinfo, formatProperties);
    }
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 23 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class FrameObject method writeBlobFromRDDtoHDFS.

@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String ofmt) throws IOException, DMLRuntimeException {
    // prepare output info
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
    // note: the write of an RDD to HDFS might trigger
    // lazy evaluation of pending transformations.
    SparkExecutionContext.writeFrameRDDtoHDFS(rdd, fname, oinfo);
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat)

Example 24 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MatrixObject method readMatrixPartition.

/**
 * NOTE: for reading matrix partitions, we could cache (in its real sense) the read block
 * with soft references (no need for eviction, as partitioning only applied for read-only matrices).
 * However, since we currently only support row- and column-wise partitioning caching is not applied yet.
 * This could be changed once we also support column-block-wise and row-block-wise. Furthermore,
 * as we reject to partition vectors and support only full row or column indexing, no metadata (apart from
 * the partition flag) is required.
 *
 * @param pred index range
 * @return matrix block
 */
public synchronized MatrixBlock readMatrixPartition(IndexRange pred) {
    if (LOG.isTraceEnabled())
        LOG.trace("Acquire partition " + hashCode() + " " + pred);
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    if (!_partitioned)
        throw new DMLRuntimeException("MatrixObject not available to indexed read.");
    // return static partition of set from outside of the program
    if (_partitionInMemory != null)
        return _partitionInMemory;
    MatrixBlock mb = null;
    try {
        boolean blockwise = (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE || _partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE);
        // preparations for block wise access
        MetaDataFormat iimd = (MetaDataFormat) _metaData;
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        int brlen = mc.getRowsPerBlock();
        int bclen = mc.getColsPerBlock();
        // get filename depending on format
        String fname = getPartitionFileName(pred, brlen, bclen);
        // probe cache
        if (blockwise && _partitionCacheName != null && _partitionCacheName.equals(fname)) {
            // try getting block from cache
            mb = _cache.get();
        }
        if (// block not in cache
        mb == null) {
            // get rows and cols
            long rows = -1;
            long cols = -1;
            switch(_partitionFormat) {
                case ROW_WISE:
                    rows = 1;
                    cols = mc.getCols();
                    break;
                case ROW_BLOCK_WISE:
                    rows = brlen;
                    cols = mc.getCols();
                    break;
                case ROW_BLOCK_WISE_N:
                    rows = _partitionSize;
                    cols = mc.getCols();
                    break;
                case COLUMN_WISE:
                    rows = mc.getRows();
                    cols = 1;
                    break;
                case COLUMN_BLOCK_WISE:
                    rows = mc.getRows();
                    cols = bclen;
                    break;
                case COLUMN_BLOCK_WISE_N:
                    rows = mc.getRows();
                    cols = _partitionSize;
                    break;
                default:
                    throw new DMLRuntimeException("Unsupported partition format: " + _partitionFormat);
            }
            // read the
            if (MapReduceTool.existsFileOnHDFS(fname))
                mb = readBlobFromHDFS(fname, rows, cols);
            else {
                mb = new MatrixBlock((int) rows, (int) cols, true);
                LOG.warn("Reading empty matrix partition " + fname);
            }
        }
        // post processing
        if (blockwise) {
            // put block into cache
            _partitionCacheName = fname;
            _cache = new SoftReference<>(mb);
            if (_partitionFormat == PDataPartitionFormat.ROW_BLOCK_WISE) {
                int rix = (int) ((pred.rowStart - 1) % brlen);
                mb = mb.slice(rix, rix, (int) (pred.colStart - 1), (int) (pred.colEnd - 1), new MatrixBlock());
            }
            if (_partitionFormat == PDataPartitionFormat.COLUMN_BLOCK_WISE) {
                int cix = (int) ((pred.colStart - 1) % bclen);
                mb = mb.slice((int) (pred.rowStart - 1), (int) (pred.rowEnd - 1), cix, cix, new MatrixBlock());
            }
        }
    // NOTE: currently no special treatment of non-existing partitions necessary
    // because empty blocks are written anyway
    } catch (Exception ex) {
        throw new DMLRuntimeException(ex);
    }
    if (DMLScript.STATISTICS) {
        long t1 = System.nanoTime();
        CacheStatistics.incrementAcquireRTime(t1 - t0);
    }
    return mb;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IOException(java.io.IOException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 25 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MatrixObject method readBlobFromHDFS.

@Override
protected MatrixBlock readBlobFromHDFS(String fname, long rlen, long clen) throws IOException {
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
    long begin = 0;
    if (LOG.isTraceEnabled()) {
        LOG.trace("Reading matrix from HDFS...  " + hashCode() + "  Path: " + fname + ", dimensions: [" + mc.getRows() + ", " + mc.getCols() + ", " + mc.getNonZeros() + "]");
        begin = System.currentTimeMillis();
    }
    // read matrix and maintain meta data
    double sparsity = (mc.getNonZeros() >= 0 ? ((double) mc.getNonZeros()) / (mc.getRows() * mc.getCols()) : 1.0d);
    MatrixBlock newData = DataConverter.readMatrixFromHDFS(fname, iimd.getInputInfo(), rlen, clen, mc.getRowsPerBlock(), mc.getColsPerBlock(), sparsity, getFileFormatProperties());
    setHDFSFileExists(true);
    // sanity check correct output
    if (newData == null)
        throw new IOException("Unable to load matrix from file: " + fname);
    if (LOG.isTraceEnabled())
        LOG.trace("Reading Completed: " + (System.currentTimeMillis() - begin) + " msec.");
    return newData;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)54 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)26 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)17 IOException (java.io.IOException)12 ValueType (org.apache.sysml.parser.Expression.ValueType)10 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 DataOp (org.apache.sysml.hops.DataOp)3