Search in sources :

Example 16 with FrameObject

use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.

/**
 * Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
 * . Note that metadata is required for IJV format.
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param frameMetadata
 *            frame metadata
 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
 */
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        ValueType[] lschema = null;
        if (lschema == null)
            lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
        rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 17 with FrameObject

use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.

the class Recompiler method executeInMemoryFrameReblock.

public static void executeInMemoryFrameReblock(ExecutionContext ec, String varin, String varout) {
    FrameObject in = ec.getFrameObject(varin);
    FrameObject out = ec.getFrameObject(varout);
    // read text input frame (through buffer pool, frame object carries all relevant
    // information including additional arguments for csv reblock)
    FrameBlock fb = in.acquireRead();
    // set output (incl update matrix characteristics)
    out.acquireModify(fb);
    out.release();
    in.release();
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject)

Example 18 with FrameObject

use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.

the class Recompiler method rUpdateStatistics.

public static void rUpdateStatistics(Hop hop, LocalVariableMap vars) {
    if (hop.isVisited())
        return;
    // recursively process children
    if (hop.getInput() != null)
        for (Hop c : hop.getInput()) rUpdateStatistics(c, vars);
    boolean updatedSizeExpr = false;
    // (with awareness not to override persistent reads to an existing name)
    if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() != DataOpTypes.PERSISTENTREAD) {
        DataOp d = (DataOp) hop;
        String varName = d.getName();
        if (vars.keySet().contains(varName)) {
            Data dat = vars.get(varName);
            if (dat instanceof MatrixObject) {
                MatrixObject mo = (MatrixObject) dat;
                d.setDim1(mo.getNumRows());
                d.setDim2(mo.getNumColumns());
                d.setNnz(mo.getNnz());
            } else if (dat instanceof FrameObject) {
                FrameObject fo = (FrameObject) dat;
                d.setDim1(fo.getNumRows());
                d.setDim2(fo.getNumColumns());
            }
        }
    } else // special case for persistent reads with unknown size (read-after-write)
    if (hop instanceof DataOp && ((DataOp) hop).getDataOpType() == DataOpTypes.PERSISTENTREAD && !hop.dimsKnown() && ((DataOp) hop).getInputFormatType() != FileFormatTypes.CSV && !ConfigurationManager.getCompilerConfigFlag(ConfigType.IGNORE_READ_WRITE_METADATA)) {
        // update hop with read meta data
        DataOp dop = (DataOp) hop;
        tryReadMetaDataFileMatrixCharacteristics(dop);
    } else // update size expression for rand/seq according to symbol table entries
    if (hop instanceof DataGenOp) {
        DataGenOp d = (DataGenOp) hop;
        HashMap<String, Integer> params = d.getParamIndexMap();
        if (d.getOp() == DataGenMethod.RAND || d.getOp() == DataGenMethod.SINIT || d.getOp() == DataGenMethod.SAMPLE) {
            boolean initUnknown = !d.dimsKnown();
            int ix1 = params.get(DataExpression.RAND_ROWS);
            int ix2 = params.get(DataExpression.RAND_COLS);
            // update rows/cols by evaluating simple expression of literals, nrow, ncol, scalars, binaryops
            HashMap<Long, Long> memo = new HashMap<>();
            d.refreshRowsParameterInformation(d.getInput().get(ix1), vars, memo);
            d.refreshColsParameterInformation(d.getInput().get(ix2), vars, memo);
            updatedSizeExpr = initUnknown & d.dimsKnown();
        } else if (d.getOp() == DataGenMethod.SEQ) {
            boolean initUnknown = !d.dimsKnown();
            int ix1 = params.get(Statement.SEQ_FROM);
            int ix2 = params.get(Statement.SEQ_TO);
            int ix3 = params.get(Statement.SEQ_INCR);
            HashMap<Long, Double> memo = new HashMap<>();
            double from = d.computeBoundsInformation(d.getInput().get(ix1), vars, memo);
            double to = d.computeBoundsInformation(d.getInput().get(ix2), vars, memo);
            double incr = d.computeBoundsInformation(d.getInput().get(ix3), vars, memo);
            // special case increment
            if (from != Double.MAX_VALUE && to != Double.MAX_VALUE) {
                incr *= ((from > to && incr > 0) || (from < to && incr < 0)) ? -1.0 : 1.0;
            }
            if (from != Double.MAX_VALUE && to != Double.MAX_VALUE && incr != Double.MAX_VALUE) {
                d.setDim1(UtilFunctions.getSeqLength(from, to, incr));
                d.setDim2(1);
                d.setIncrementValue(incr);
            }
            updatedSizeExpr = initUnknown & d.dimsKnown();
        } else {
            throw new DMLRuntimeException("Unexpected data generation method: " + d.getOp());
        }
    } else // update size expression for reshape according to symbol table entries
    if (hop instanceof ReorgOp && ((ReorgOp) (hop)).getOp() == Hop.ReOrgOp.RESHAPE) {
        ReorgOp d = (ReorgOp) hop;
        boolean initUnknown = !d.dimsKnown();
        HashMap<Long, Long> memo = new HashMap<>();
        d.refreshRowsParameterInformation(d.getInput().get(1), vars, memo);
        d.refreshColsParameterInformation(d.getInput().get(2), vars, memo);
        updatedSizeExpr = initUnknown & d.dimsKnown();
    } else // update size expression for indexing according to symbol table entries
    if (hop instanceof IndexingOp) {
        IndexingOp iop = (IndexingOp) hop;
        // inpRowL
        Hop input2 = iop.getInput().get(1);
        // inpRowU
        Hop input3 = iop.getInput().get(2);
        // inpColL
        Hop input4 = iop.getInput().get(3);
        // inpColU
        Hop input5 = iop.getInput().get(4);
        boolean initUnknown = !iop.dimsKnown();
        HashMap<Long, Double> memo = new HashMap<>();
        double rl = iop.computeBoundsInformation(input2, vars, memo);
        double ru = iop.computeBoundsInformation(input3, vars, memo);
        double cl = iop.computeBoundsInformation(input4, vars, memo);
        double cu = iop.computeBoundsInformation(input5, vars, memo);
        if (rl != Double.MAX_VALUE && ru != Double.MAX_VALUE)
            iop.setDim1((long) (ru - rl + 1));
        if (cl != Double.MAX_VALUE && cu != Double.MAX_VALUE)
            iop.setDim2((long) (cu - cl + 1));
        updatedSizeExpr = initUnknown & iop.dimsKnown();
    }
    // without overwriting inferred size expressions
    if (!updatedSizeExpr) {
        hop.refreshSizeInformation();
    }
    hop.setVisited();
}
Also used : MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) HashMap(java.util.HashMap) Hop(org.apache.sysml.hops.Hop) CacheableData(org.apache.sysml.runtime.controlprogram.caching.CacheableData) Data(org.apache.sysml.runtime.instructions.cp.Data) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) IndexingOp(org.apache.sysml.hops.IndexingOp) DataGenOp(org.apache.sysml.hops.DataGenOp) ReorgOp(org.apache.sysml.hops.ReorgOp) DataOp(org.apache.sysml.hops.DataOp)

Example 19 with FrameObject

use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.

the class ExecutionContext method setFrameOutput.

public void setFrameOutput(String varName, FrameBlock outputData) {
    FrameObject fo = getFrameObject(varName);
    fo.acquireModify(outputData);
    fo.release();
    setVariable(varName, fo);
}
Also used : FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject)

Example 20 with FrameObject

use of org.apache.sysml.runtime.controlprogram.caching.FrameObject in project incubator-systemml by apache.

the class SparkExecutionContext method getBroadcastForFrameVariable.

@SuppressWarnings("unchecked")
public PartitionedBroadcast<FrameBlock> getBroadcastForFrameVariable(String varname) {
    long t0 = DMLScript.STATISTICS ? System.nanoTime() : 0;
    FrameObject fo = getFrameObject(varname);
    PartitionedBroadcast<FrameBlock> bret = null;
    // reuse existing broadcast handle
    if (fo.getBroadcastHandle() != null && fo.getBroadcastHandle().isValid()) {
        bret = fo.getBroadcastHandle().getBroadcast();
    }
    // create new broadcast handle (never created, evicted)
    if (bret == null) {
        // account for overwritten invalid broadcast (e.g., evicted)
        if (fo.getBroadcastHandle() != null)
            CacheableData.addBroadcastSize(-fo.getBroadcastHandle().getSize());
        // obtain meta data for frame
        int bclen = (int) fo.getNumColumns();
        int brlen = OptimizerUtils.getDefaultFrameSize();
        // create partitioned frame block and release memory consumed by input
        FrameBlock mb = fo.acquireRead();
        PartitionedBlock<FrameBlock> pmb = new PartitionedBlock<>(mb, brlen, bclen);
        fo.release();
        // determine coarse-grained partitioning
        int numPerPart = PartitionedBroadcast.computeBlocksPerPartition(fo.getNumRows(), fo.getNumColumns(), brlen, bclen);
        int numParts = (int) Math.ceil((double) pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() / numPerPart);
        Broadcast<PartitionedBlock<FrameBlock>>[] ret = new Broadcast[numParts];
        // create coarse-grained partitioned broadcasts
        if (numParts > 1) {
            for (int i = 0; i < numParts; i++) {
                int offset = i * numPerPart;
                int numBlks = Math.min(numPerPart, pmb.getNumRowBlocks() * pmb.getNumColumnBlocks() - offset);
                PartitionedBlock<FrameBlock> tmp = pmb.createPartition(offset, numBlks, new FrameBlock());
                ret[i] = getSparkContext().broadcast(tmp);
                if (!isLocalMaster())
                    tmp.clearBlocks();
            }
        } else {
            // single partition
            ret[0] = getSparkContext().broadcast(pmb);
            if (!isLocalMaster())
                pmb.clearBlocks();
        }
        bret = new PartitionedBroadcast<>(ret, fo.getMatrixCharacteristics());
        BroadcastObject<FrameBlock> bchandle = new BroadcastObject<>(bret, OptimizerUtils.estimatePartitionedSizeExactSparsity(fo.getMatrixCharacteristics()));
        fo.setBroadcastHandle(bchandle);
        CacheableData.addBroadcastSize(bchandle.getSize());
    }
    if (DMLScript.STATISTICS) {
        Statistics.accSparkBroadCastTime(System.nanoTime() - t0);
        Statistics.incSparkBroadcastCount(1);
    }
    return bret;
}
Also used : PartitionedBlock(org.apache.sysml.runtime.instructions.spark.data.PartitionedBlock) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) PartitionedBroadcast(org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcast) Broadcast(org.apache.spark.broadcast.Broadcast) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) Checkpoint(org.apache.sysml.lops.Checkpoint) BroadcastObject(org.apache.sysml.runtime.instructions.spark.data.BroadcastObject)

Aggregations

FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)26 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)14 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)14 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)13 ValueType (org.apache.sysml.parser.Expression.ValueType)7 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)7 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)6 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)6 LongWritable (org.apache.hadoop.io.LongWritable)5 Text (org.apache.hadoop.io.Text)5 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)3 DMLException (org.apache.sysml.api.DMLException)3 CacheableData (org.apache.sysml.runtime.controlprogram.caching.CacheableData)3 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)3 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)3 Encoder (org.apache.sysml.runtime.transform.encode.Encoder)3