Search in sources :

Example 66 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class FrameObject method readBlobFromRDD.

@Override
protected FrameBlock readBlobFromRDD(RDDObject rdd, MutableBoolean status) throws IOException {
    // note: the read of a frame block from an RDD might trigger
    // lazy evaluation of pending transformations.
    RDDObject lrdd = rdd;
    // prepare return status (by default only collect)
    status.setValue(false);
    MetaDataFormat iimd = (MetaDataFormat) _metaData;
    MatrixCharacteristics mc = iimd.getMatrixCharacteristics();
    int rlen = (int) mc.getRows();
    int clen = (int) mc.getCols();
    // handle missing schema if necessary
    ValueType[] lschema = (_schema != null) ? _schema : UtilFunctions.nCopies(clen >= 1 ? (int) clen : 1, ValueType.STRING);
    FrameBlock fb = null;
    try {
        // prevent unnecessary collect through rdd checkpoint
        if (rdd.allowsShortCircuitCollect()) {
            lrdd = (RDDObject) rdd.getLineageChilds().get(0);
        }
        // collect frame block from binary block RDD
        fb = SparkExecutionContext.toFrameBlock(lrdd, lschema, rlen, clen);
    } catch (DMLRuntimeException ex) {
        throw new IOException(ex);
    }
    // sanity check correct output
    if (fb == null)
        throw new IOException("Unable to load frame from rdd.");
    return fb;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 67 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class SparkExecutionContext method getBroadcastForFrameVariable.

@SuppressWarnings("unchecked")
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname) {
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    FrameObject fo = getFrameObject(varname);
    PartitionedBroadcast<FrameBlock> bret = null;
    // reuse existing broadcast handle
    if (fo.getBroadcastHandle() != null && fo.getBroadcastHandle().isValid()) {
        bret = fo.getBroadcastHandle().getBroadcast();
    }
    // create new broadcast handle (never created, evicted)
    if (bret == null) {
        // account for overwritten invalid broadcast (e.g., evicted)
        if (fo.getBroadcastHandle() != null)
            CacheableData.addBroadcastSize(-fo.getBroadcastHandle().getSize());
        // obtain meta data for frame
        int bclen = (int) fo.getNumColumns();
        int brlen = OptimizerUtils.getDefaultFrameSize();
        // create partitioned frame block and release memory consumed by input
        FrameBlock mb = fo.acquireRead();
        PartitionedBlock<FrameBlock> pmb = new PartitionedBlock<>(mb, brlen, bclen);
        fo.release();
        // determine coarse-grained partitioning
        int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(fo.getNumRows(), fo.getNumColumns(), brlen, bclen);
        int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
        Broadcast<PartitionedBlock<FrameBlock>>[] ret = new Broadcast[numParts];
        // create coarse-grained partitioned broadcasts
        if (numParts > 1) {
            for (int i = 0; i < numParts; i++) {
                int offset = i * numPerPart;
                int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
                PartitionedBlock<FrameBlock> tmp = pmb.createPartition(offset, numBlks, new FrameBlock());
                ret[i] = getSparkContext().broadcast(tmp);
                if (!isLocalMaster())
                    tmp.clearBlocks();
            }
        } else {
            // single partition
            ret[0] = getSparkContext().broadcast(pmb);
            if (!isLocalMaster())
                pmb.clearBlocks();
        }
        bret = new PartitionedBroadcast<>(ret, fo.getMatrixCharacteristics());
        BroadcastObject<FrameBlock> bchandle = new BroadcastObject<>(bret, OptimizerUtils.estimatePartitionedSizeExactSparsity(fo.getMatrixCharacteristics()));
        fo.setBroadcastHandle(bchandle);
        CacheableData.addBroadcastSize(bchandle.getSize());
    }
    if (DMLScript.STATISTICS) {
        Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
        Statistics.incSparkBroadcastCount(1);
    }
    return bret;
}
Also used : PartitionedBlock(org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) PartitionedBroadcast(org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast) Broadcast(org.apache.spark.broadcast.Broadcast) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) Checkpoint(org.apache.sysml.lops.Checkpoint) BroadcastObject(org.apache.sysml.runtime.instructions.spark.data.BroadcastObject)

Example 68 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class SparkExecutionContext method getRDDHandleForFrameObject.

/**
 * FIXME: currently this implementation assumes matrix representations but frame signature
 * in order to support the old transform implementation.
 *
 * @param fo frame object
 * @param inputInfo input info
 * @return JavaPairRDD handle for a frame object
 */
@SuppressWarnings("unchecked")
public JavaPairRDD<?, ?> getRDDHandleForFrameObject(FrameObject fo, InputInfo inputInfo) {
    // NOTE: MB this logic should be integrated into FrameObject
    // However, for now we cannot assume that spark libraries are
    // always available and hence only store generic references in
    // matrix object while all the logic is in the SparkExecContext
    InputInfo inputInfo2 = (inputInfo == InputInfo.BinaryBlockInputInfo) ? InputInfo.BinaryBlockFrameInputInfo : inputInfo;
    JavaSparkContext sc = getSparkContext();
    JavaPairRDD<?, ?> rdd = null;
    // rdd operations if already executed and cached
    if (fo.getRDDHandle() != null && (fo.getRDDHandle().isCheckpointRDD() || !fo.isCached(false))) {
        // return existing rdd handling (w/o input format change)
        rdd = fo.getRDDHandle().getRDD();
    } else // CASE 2: dirty in memory data or cached result of rdd operations
    if (fo.isDirty() || fo.isCached(false)) {
        // get in-memory matrix block and parallelize it
        // w/ guarded parallelize (fallback to export, rdd from file if too large)
        MatrixCharacteristics mc = fo.getMatrixCharacteristics();
        boolean fromFile = false;
        if (!OptimizerUtils.checkSparkCollectMemoryBudget(mc, 0) || !_parRDDs.reserve(OptimizerUtils.estimatePartitionedSizeExactSparsity(mc))) {
            if (fo.isDirty()) {
                // write only if necessary
                fo.exportData();
            }
            rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
            // cp is workaround for read bug
            rdd = ((JavaPairRDD<LongWritable, FrameBlock>) rdd).mapToPair(new CopyFrameBlockPairFunction());
            fromFile = true;
        } else {
            // default case
            // pin frame in memory
            FrameBlock fb = fo.acquireRead();
            rdd = toFrameJavaPairRDD(sc, fb);
            // unpin frame
            fo.release();
            _parRDDs.registerRDD(rdd.id(), OptimizerUtils.estimatePartitionedSizeExactSparsity(mc), true);
        }
        // keep rdd handle for future operations on it
        RDDObject rddhandle = new RDDObject(rdd);
        rddhandle.setHDFSFile(fromFile);
        fo.setRDDHandle(rddhandle);
    } else // CASE 3: non-dirty (file exists on HDFS)
    {
        // For binary block, these are: SequenceFileInputFormat.class, MatrixIndexes.class, MatrixBlock.class
        if (inputInfo2 == InputInfo.BinaryBlockFrameInputInfo) {
            rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
            // note: this copy is still required in Spark 1.4 because spark hands out whatever the inputformat
            // recordreader returns; the javadoc explicitly recommend to copy all key/value pairs
            // cp is workaround for read bug
            rdd = ((JavaPairRDD<LongWritable, FrameBlock>) rdd).mapToPair(new CopyFrameBlockPairFunction());
        } else if (inputInfo2 == InputInfo.TextCellInputInfo || inputInfo2 == InputInfo.CSVInputInfo || inputInfo2 == InputInfo.MatrixMarketInputInfo) {
            rdd = sc.hadoopFile(fo.getFileName(), inputInfo2.inputFormatClass, inputInfo2.inputKeyClass, inputInfo2.inputValueClass);
            // cp is workaround for read bug
            rdd = ((JavaPairRDD<LongWritable, Text>) rdd).mapToPair(new CopyTextInputFunction());
        } else if (inputInfo2 == InputInfo.BinaryCellInputInfo) {
            throw new DMLRuntimeException("Binarycell not supported for frames.");
        } else {
            throw new DMLRuntimeException("Incorrect input format in getRDDHandleForVariable");
        }
        // keep rdd handle for future operations on it
        RDDObject rddhandle = new RDDObject(rdd);
        rddhandle.setHDFSFile(true);
        fo.setRDDHandle(rddhandle);
    }
    return rdd;
}
Also used : CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) Text(org.apache.hadoop.io.Text) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) LongWritable(org.apache.hadoop.io.LongWritable) CopyFrameBlockPairFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyFrameBlockPairFunction) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 69 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class ByteBuffer method deserializeBlock.

public CacheBlock deserializeBlock() throws IOException {
    CacheBlock ret = null;
    if (!_shallow) {
        // sparse matrix / string frame
        DataInput din = _matrix ? new CacheDataInput(_bdata) : new DataInputStream(new ByteArrayInputStream(_bdata));
        ret = _matrix ? new MatrixBlock() : new FrameBlock();
        ret.readFields(din);
    } else {
        // dense matrix/frame
        ret = _cdata;
    }
    return ret;
}
Also used : DataInput(java.io.DataInput) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ByteArrayInputStream(java.io.ByteArrayInputStream) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) DataInputStream(java.io.DataInputStream)

Example 70 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class ParameterizedBuiltinCPInstruction method processInstruction.

@Override
public void processInstruction(ExecutionContext ec) {
    String opcode = getOpcode();
    ScalarObject sores = null;
    if (opcode.equalsIgnoreCase("cdf")) {
        SimpleOperator op = (SimpleOperator) _optr;
        double result = op.fn.execute(params);
        sores = new DoubleObject(result);
        ec.setScalarOutput(output.getName(), sores);
    } else if (opcode.equalsIgnoreCase("invcdf")) {
        SimpleOperator op = (SimpleOperator) _optr;
        double result = op.fn.execute(params);
        sores = new DoubleObject(result);
        ec.setScalarOutput(output.getName(), sores);
    } else if (opcode.equalsIgnoreCase("groupedagg")) {
        // acquire locks
        MatrixBlock target = ec.getMatrixInput(params.get(Statement.GAGG_TARGET), getExtendedOpcode());
        MatrixBlock groups = ec.getMatrixInput(params.get(Statement.GAGG_GROUPS), getExtendedOpcode());
        MatrixBlock weights = null;
        if (params.get(Statement.GAGG_WEIGHTS) != null)
            weights = ec.getMatrixInput(params.get(Statement.GAGG_WEIGHTS), getExtendedOpcode());
        int ngroups = -1;
        if (params.get(Statement.GAGG_NUM_GROUPS) != null) {
            ngroups = (int) Double.parseDouble(params.get(Statement.GAGG_NUM_GROUPS));
        }
        // compute the result
        // num threads
        int k = Integer.parseInt(params.get("k"));
        MatrixBlock soresBlock = groups.groupedAggOperations(target, weights, new MatrixBlock(), ngroups, _optr, k);
        ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
        // release locks
        target = groups = weights = null;
        ec.releaseMatrixInput(params.get(Statement.GAGG_TARGET), getExtendedOpcode());
        ec.releaseMatrixInput(params.get(Statement.GAGG_GROUPS), getExtendedOpcode());
        if (params.get(Statement.GAGG_WEIGHTS) != null)
            ec.releaseMatrixInput(params.get(Statement.GAGG_WEIGHTS), getExtendedOpcode());
    } else if (opcode.equalsIgnoreCase("rmempty")) {
        String margin = params.get("margin");
        if (!(margin.equals("rows") || margin.equals("cols")))
            throw new DMLRuntimeException("Unspupported margin identifier '" + margin + "'.");
        // acquire locks
        MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
        MatrixBlock select = params.containsKey("select") ? ec.getMatrixInput(params.get("select"), getExtendedOpcode()) : null;
        // compute the result
        boolean emptyReturn = Boolean.parseBoolean(params.get("empty.return").toLowerCase());
        MatrixBlock soresBlock = target.removeEmptyOperations(new MatrixBlock(), margin.equals("rows"), emptyReturn, select);
        // release locks
        ec.setMatrixOutput(output.getName(), soresBlock, getExtendedOpcode());
        ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
        if (params.containsKey("select"))
            ec.releaseMatrixInput(params.get("select"), getExtendedOpcode());
    } else if (opcode.equalsIgnoreCase("replace")) {
        // acquire locks
        MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
        // compute the result
        double pattern = Double.parseDouble(params.get("pattern"));
        double replacement = Double.parseDouble(params.get("replacement"));
        MatrixBlock ret = (MatrixBlock) target.replaceOperations(new MatrixBlock(), pattern, replacement);
        // release locks
        ec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
        ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
    } else if (opcode.equalsIgnoreCase("rexpand")) {
        // acquire locks
        MatrixBlock target = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
        // compute the result
        double maxVal = Double.parseDouble(params.get("max"));
        boolean dirVal = params.get("dir").equals("rows");
        boolean cast = Boolean.parseBoolean(params.get("cast"));
        boolean ignore = Boolean.parseBoolean(params.get("ignore"));
        int numThreads = Integer.parseInt(params.get("k"));
        MatrixBlock ret = (MatrixBlock) target.rexpandOperations(new MatrixBlock(), maxVal, dirVal, cast, ignore, numThreads);
        // release locks
        ec.setMatrixOutput(output.getName(), ret, getExtendedOpcode());
        ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
    } else if (opcode.equalsIgnoreCase("transformapply")) {
        // acquire locks
        FrameBlock data = ec.getFrameInput(params.get("target"));
        FrameBlock meta = ec.getFrameInput(params.get("meta"));
        String[] colNames = data.getColumnNames();
        // compute transformapply
        Encoder encoder = EncoderFactory.createEncoder(params.get("spec"), colNames, data.getNumColumns(), meta);
        MatrixBlock mbout = encoder.apply(data, new MatrixBlock(data.getNumRows(), data.getNumColumns(), false));
        // release locks
        ec.setMatrixOutput(output.getName(), mbout, getExtendedOpcode());
        ec.releaseFrameInput(params.get("target"));
        ec.releaseFrameInput(params.get("meta"));
    } else if (opcode.equalsIgnoreCase("transformdecode")) {
        // acquire locks
        MatrixBlock data = ec.getMatrixInput(params.get("target"), getExtendedOpcode());
        FrameBlock meta = ec.getFrameInput(params.get("meta"));
        String[] colnames = meta.getColumnNames();
        // compute transformdecode
        Decoder decoder = DecoderFactory.createDecoder(getParameterMap().get("spec"), colnames, null, meta);
        FrameBlock fbout = decoder.decode(data, new FrameBlock(decoder.getSchema()));
        fbout.setColumnNames(Arrays.copyOfRange(colnames, 0, fbout.getNumColumns()));
        // release locks
        ec.setFrameOutput(output.getName(), fbout);
        ec.releaseMatrixInput(params.get("target"), getExtendedOpcode());
        ec.releaseFrameInput(params.get("meta"));
    } else if (opcode.equalsIgnoreCase("transformcolmap")) {
        // acquire locks
        FrameBlock meta = ec.getFrameInput(params.get("target"));
        String[] colNames = meta.getColumnNames();
        // compute transformapply
        Encoder encoder = EncoderFactory.createEncoder(params.get("spec"), colNames, meta.getNumColumns(), null);
        MatrixBlock mbout = encoder.getColMapping(meta, new MatrixBlock(meta.getNumColumns(), 3, false));
        // release locks
        ec.setMatrixOutput(output.getName(), mbout, getExtendedOpcode());
        ec.releaseFrameInput(params.get("target"));
    } else if (opcode.equalsIgnoreCase("transformmeta")) {
        // get input spec and path
        String spec = getParameterMap().get("spec");
        String path = getParameterMap().get(ParameterizedBuiltinFunctionExpression.TF_FN_PARAM_MTD);
        String delim = getParameterMap().containsKey("sep") ? getParameterMap().get("sep") : TfUtils.TXMTD_SEP;
        // execute transform meta data read
        FrameBlock meta = null;
        try {
            meta = TfMetaUtils.readTransformMetaDataFromFile(spec, path, delim);
        } catch (Exception ex) {
            throw new DMLRuntimeException(ex);
        }
        // release locks
        ec.setFrameOutput(output.getName(), meta);
    } else if (opcode.equalsIgnoreCase("toString")) {
        // handle input parameters
        int rows = (getParam("rows") != null) ? Integer.parseInt(getParam("rows")) : TOSTRING_MAXROWS;
        int cols = (getParam("cols") != null) ? Integer.parseInt(getParam("cols")) : TOSTRING_MAXCOLS;
        int decimal = (getParam("decimal") != null) ? Integer.parseInt(getParam("decimal")) : TOSTRING_DECIMAL;
        boolean sparse = (getParam("sparse") != null) ? Boolean.parseBoolean(getParam("sparse")) : TOSTRING_SPARSE;
        String separator = (getParam("sep") != null) ? getParam("sep") : TOSTRING_SEPARATOR;
        String lineseparator = (getParam("linesep") != null) ? getParam("linesep") : TOSTRING_LINESEPARATOR;
        // get input matrix/frame and convert to string
        CacheableData<?> data = ec.getCacheableData(getParam("target"));
        String out = null;
        if (data instanceof MatrixObject) {
            MatrixBlock matrix = (MatrixBlock) data.acquireRead();
            warnOnTrunction(matrix, rows, cols);
            out = DataConverter.toString(matrix, sparse, separator, lineseparator, rows, cols, decimal);
        } else if (data instanceof FrameObject) {
            FrameBlock frame = (FrameBlock) data.acquireRead();
            warnOnTrunction(frame, rows, cols);
            out = DataConverter.toString(frame, sparse, separator, lineseparator, rows, cols, decimal);
        } else {
            throw new DMLRuntimeException("toString only converts matrix or frames to string");
        }
        ec.releaseCacheableData(getParam("target"));
        ec.setScalarOutput(output.getName(), new StringObject(out));
    } else {
        throw new DMLRuntimeException("Unknown opcode : " + opcode);
    }
}
Also used : SimpleOperator(org.apache.sysml.runtime.matrix.operators.SimpleOperator) MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) Decoder(org.apache.sysml.runtime.transform.decode.Decoder) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Encoder(org.apache.sysml.runtime.transform.encode.Encoder)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)90 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 ValueType (org.apache.sysml.parser.Expression.ValueType)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)23 FrameReader (org.apache.sysml.runtime.io.FrameReader)18 IOException (java.io.IOException)16 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)16 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)15 LongWritable (org.apache.hadoop.io.LongWritable)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)11 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 Text (org.apache.hadoop.io.Text)7 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)6 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)5 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)5