Search in sources :

Example 41 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MatrixObject method readBlobFromRDD.

@Override
protected MatrixBlock readBlobFromRDD(RDDObject rdd, MutableBoolean writeStatus) throws IOException {
    // note: the read of a matrix block from an RDD might trigger
    // lazy evaluation of pending transformations.
    RDDObject lrdd = rdd;
    // prepare return status (by default only collect)
    writeStatus.setValue(false);
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
    InputInfo ii = iimd.getInputInfo();
    MatrixBlock mb = null;
    try {
        // prevent unnecessary collect through rdd checkpoint
        if (rdd.allowsShortCircuitCollect()) {
            lrdd = (RDDObject) rdd.getLineageChilds().get(0);
        }
        // obtain matrix block from RDD
        int rlen = (int) mc.getRows();
        int clen = (int) mc.getCols();
        int brlen = (int) mc.getRowsPerBlock();
        int bclen = (int) mc.getColsPerBlock();
        long nnz = mc.getNonZerosBound();
        // guarded rdd collect
        if (// guarded collect not for binary cell
        ii == InputInfo.BinaryBlockInputInfo && !OptimizerUtils.checkSparkCollectMemoryBudget(mc, getPinnedSize() + getBroadcastSize(), true)) {
            // note: lazy, partition-at-a-time collect (toLocalIterator) was significantly slower
            if (!MapReduceTool.existsFileOnHDFS(_hdfsFileName)) {
                // prevent overwrite existing file
                long newnnz = SparkExecutionContext.writeRDDtoHDFS(lrdd, _hdfsFileName, iimd.getOutputInfo());
                _metaData.getMatrixCharacteristics().setNonZeros(newnnz);
                // mark rdd as non-pending (for export)
                ((RDDObject) rdd).setPending(false);
                // mark rdd as hdfs file (for restore)
                ((RDDObject) rdd).setHDFSFile(true);
                // mark for no cache-write on read
                writeStatus.setValue(true);
            // note: the flag hdfsFile is actually not entirely correct because we still hold an rdd
            // reference to the input not to an rdd of the hdfs file but the resulting behavior is correct
            }
            mb = readBlobFromHDFS(_hdfsFileName);
        } else if (ii == InputInfo.BinaryCellInputInfo) {
            // collect matrix block from binary block RDD
            mb = SparkExecutionContext.toMatrixBlock(lrdd, rlen, clen, nnz);
        } else {
            // collect matrix block from binary cell RDD
            mb = SparkExecutionContext.toMatrixBlock(lrdd, rlen, clen, brlen, bclen, nnz);
        }
    } catch (DMLRuntimeException ex) {
        throw new IOException(ex);
    }
    // sanity check correct output
    if (mb == null)
        throw new IOException("Unable to load matrix from rdd.");
    return mb;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 42 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MatrixObject method writeBlobFromRDDtoHDFS.

@Override
protected void writeBlobFromRDDtoHDFS(RDDObject rdd, String fname, String outputFormat) throws IOException, DMLRuntimeException {
    // prepare output info
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    OutputInfo oinfo = (outputFormat != null ? OutputInfo.stringToOutputInfo(outputFormat) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
    // note: the write of an RDD to HDFS might trigger
    // lazy evaluation of pending transformations.
    long newnnz = SparkExecutionContext.writeRDDtoHDFS(rdd, fname, oinfo);
    _metaData.getMatrixCharacteristics().setNonZeros(newnnz);
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat)

Example 43 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MatrixObject method writeBlobToHDFS.

/**
 * Writes in-memory matrix to HDFS in a specified format.
 */
@Override
protected void writeBlobToHDFS(String fname, String ofmt, int rep, FileFormatProperties fprop) throws IOException, DMLRuntimeException {
    long begin = 0;
    if (LOG.isTraceEnabled()) {
        LOG.trace(" Writing matrix to HDFS...  " + hashCode() + "  Path: " + fname + ", Format: " + (ofmt != null ? ofmt : "inferred from metadata"));
        begin = System.currentTimeMillis();
    }
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    if (_data != null) {
        // Get the dimension information from the metadata stored within MatrixObject
        MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
        // Write the matrix to HDFS in requested format
        OutputInfo oinfo = (ofmt != null ? OutputInfo.stringToOutputInfo(ofmt) : InputInfo.getMatchingOutputInfo(iimd.getInputInfo()));
        // note: this is only required if singlenode (due to binarycell default)
        if (oinfo == OutputInfo.BinaryBlockOutputInfo && DMLScript.rtplatform == RUNTIME_PLATFORM.SINGLE_NODE && (mc.getRowsPerBlock() != ConfigurationManager.getBlocksize() || mc.getColsPerBlock() != ConfigurationManager.getBlocksize())) {
            DataConverter.writeMatrixToHDFS(_data, fname, oinfo, new MatrixCharacteristics(mc.getRows(), mc.getCols(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), mc.getNonZeros()), rep, fprop);
        } else {
            DataConverter.writeMatrixToHDFS(_data, fname, oinfo, mc, rep, fprop);
        }
        if (LOG.isTraceEnabled())
            LOG.trace("Writing matrix to HDFS (" + fname + ") - COMPLETED... " + (System.currentTimeMillis() - begin) + " msec.");
    } else if (LOG.isTraceEnabled()) {
        LOG.trace("Writing matrix to HDFS (" + fname + ") - NOTHING TO WRITE (_data == null).");
    }
    if (DMLScript.STATISTICS)
        CacheStatistics.incrementHDFSWrites();
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 44 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ProgramConverter method parseDataObject.

/**
 * NOTE: MRJobConfiguration cannot be used for the general case because program blocks and
 * related symbol tables can be hierarchically structured.
 *
 * @param in data object as string
 * @return array of objects
 */
public static Object[] parseDataObject(String in) {
    Object[] ret = new Object[2];
    StringTokenizer st = new StringTokenizer(in, DATA_FIELD_DELIM);
    String name = st.nextToken();
    DataType datatype = DataType.valueOf(st.nextToken());
    ValueType valuetype = ValueType.valueOf(st.nextToken());
    String valString = st.hasMoreTokens() ? st.nextToken() : "";
    Data dat = null;
    switch(datatype) {
        case SCALAR:
            {
                switch(valuetype) {
                    case INT:
                        dat = new IntObject(Long.parseLong(valString));
                        break;
                    case DOUBLE:
                        dat = new DoubleObject(Double.parseDouble(valString));
                        break;
                    case BOOLEAN:
                        dat = new BooleanObject(Boolean.parseBoolean(valString));
                        break;
                    case STRING:
                        dat = new StringObject(valString);
                        break;
                    default:
                        throw new DMLRuntimeException("Unable to parse valuetype " + valuetype);
                }
                break;
            }
        case MATRIX:
            {
                MatrixObject mo = new MatrixObject(valuetype, valString);
                long rows = Long.parseLong(st.nextToken());
                long cols = Long.parseLong(st.nextToken());
                int brows = Integer.parseInt(st.nextToken());
                int bcols = Integer.parseInt(st.nextToken());
                long nnz = Long.parseLong(st.nextToken());
                InputInfo iin = InputInfo.stringToInputInfo(st.nextToken());
                OutputInfo oin = OutputInfo.stringToOutputInfo(st.nextToken());
                PartitionFormat partFormat = PartitionFormat.valueOf(st.nextToken());
                UpdateType inplace = UpdateType.valueOf(st.nextToken());
                MatrixCharacteristics mc = new MatrixCharacteristics(rows, cols, brows, bcols, nnz);
                MetaDataFormat md = new MetaDataFormat(mc, oin, iin);
                mo.setMetaData(md);
                if (partFormat._dpf != PDataPartitionFormat.NONE)
                    mo.setPartitioned(partFormat._dpf, partFormat._N);
                mo.setUpdateType(inplace);
                dat = mo;
                break;
            }
        default:
            throw new DMLRuntimeException("Unable to parse datatype " + datatype);
    }
    ret[0] = name;
    ret[1] = dat;
    return ret;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ValueType(org.apache.sysml.parser.Expression.ValueType) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) Data(org.apache.sysml.runtime.instructions.cp.Data) PartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PartitionFormat) PDataPartitionFormat(org.apache.sysml.runtime.controlprogram.ParForProgramBlock.PDataPartitionFormat) UpdateType(org.apache.sysml.runtime.controlprogram.caching.MatrixObject.UpdateType) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) StringTokenizer(java.util.StringTokenizer) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) DataType(org.apache.sysml.parser.Expression.DataType) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Example 45 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class ResultMergeLocalFile method createNewMatrixObject.

private MatrixObject createNewMatrixObject(MatrixObject output, ArrayList<MatrixObject> inMO) {
    MetaDataFormat metadata = (MetaDataFormat) _output.getMetaData();
    MatrixObject moNew = new MatrixObject(_output.getValueType(), _outputFName);
    // create deep copy of metadata obj
    MatrixCharacteristics mcOld = metadata.getMatrixCharacteristics();
    OutputInfo oiOld = metadata.getOutputInfo();
    InputInfo iiOld = metadata.getInputInfo();
    MatrixCharacteristics mc = new MatrixCharacteristics(mcOld);
    mc.setNonZeros(_isAccum ? -1 : computeNonZeros(output, inMO));
    MetaDataFormat meta = new MetaDataFormat(mc, oiOld, iiOld);
    moNew.setMetaData(meta);
    return moNew;
}
Also used : OutputInfo(org.apache.sysml.runtime.matrix.data.OutputInfo) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)54 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)26 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)17 IOException (java.io.IOException)12 ValueType (org.apache.sysml.parser.Expression.ValueType)10 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 DataOp (org.apache.sysml.hops.DataOp)3