Search in sources :

Example 31 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method matrixBlockToMatrixObject.

/**
 * Convert a {@code MatrixBlock} to a {@code MatrixObject}.
 *
 * @param variableName
 *            name of the variable associated with the matrix
 * @param matrixBlock
 *            matrix as a MatrixBlock
 * @param matrixMetadata
 *            the matrix metadata
 * @return the {@code MatrixBlock} converted to a {@code MatrixObject}
 */
public static MatrixObject matrixBlockToMatrixObject(String variableName, MatrixBlock matrixBlock, MatrixMetadata matrixMetadata) {
    try {
        MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
        MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo));
        matrixObject.acquireModify(matrixBlock);
        matrixObject.release();
        return matrixObject;
    } catch (DMLRuntimeException e) {
        throw new MLContextException("Exception converting MatrixBlock to MatrixObject", e);
    }
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 32 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.

/**
 * Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
 * . Note that metadata is required for IJV format.
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param frameMetadata
 *            frame metadata
 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
 */
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        ValueType[] lschema = null;
        if (lschema == null)
            lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
        rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 33 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class RewriteRemovePersistentReadWrite method rule_RemovePersistentDataOp.

private void rule_RemovePersistentDataOp(Hop hop) {
    // check mark processed
    if (hop.isVisited())
        return;
    // recursively process childs
    ArrayList<Hop> inputs = hop.getInput();
    for (int i = 0; i < inputs.size(); i++) rule_RemovePersistentDataOp(inputs.get(i));
    // remove cast if unnecessary
    if (hop instanceof DataOp) {
        DataOp dop = (DataOp) hop;
        DataOpTypes dotype = dop.getDataOpType();
        switch(dotype) {
            case PERSISTENTREAD:
                if (_inputs.contains(dop.getName())) {
                    dop.setDataOpType(DataOpTypes.TRANSIENTREAD);
                    if (hop.getDataType() == DataType.SCALAR) {
                        dop.removeInput("iofilename");
                    }
                    // disable unnecessary reblock of binary block w/ equal block sizes
                    if (dop.requiresReblock() && _inputsMeta.containsKey(dop.getName()) && _inputsMeta.get(dop.getName()) instanceof MetaDataFormat) {
                        MetaDataFormat meta = (MetaDataFormat) _inputsMeta.get(dop.getName());
                        MatrixCharacteristics mc = meta.getMatrixCharacteristics();
                        boolean matchingBlksz = mc.getRowsPerBlock() == dop.getRowsInBlock() && mc.getColsPerBlock() == dop.getColsInBlock();
                        // binary matrix w/ matching dims and frames do not require reblock
                        if (meta.getInputInfo() == InputInfo.BinaryBlockInputInfo && (matchingBlksz || dop.getDataType() == DataType.FRAME)) {
                            dop.setRequiresReblock(false);
                        }
                    }
                } else
                    LOG.warn("Non-registered persistent read of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
                break;
            case PERSISTENTWRITE:
                if (_outputs.contains(dop.getName())) {
                    dop.setDataOpType(DataOpTypes.TRANSIENTWRITE);
                    dop.setRowsInBlock(dop.getInput().get(0).getRowsInBlock());
                    dop.setColsInBlock(dop.getInput().get(0).getColsInBlock());
                    if (hop.getDataType() == DataType.SCALAR) {
                        dop.removeInput("iofilename");
                    }
                } else
                    LOG.warn("Non-registered persistent write of variable '" + dop.getName() + "' (line " + dop.getBeginLine() + ").");
                break;
            default:
        }
    }
    // mark processed
    hop.setVisited();
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) DataOpTypes(org.apache.sysml.hops.Hop.DataOpTypes) Hop(org.apache.sysml.hops.Hop) DataOp(org.apache.sysml.hops.DataOp) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 34 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class InterProceduralAnalysis method populateLocalVariableMapForFunctionCall.

private static void populateLocalVariableMapForFunctionCall(FunctionStatement fstmt, FunctionOp fop, LocalVariableMap callvars, LocalVariableMap vars, FunctionCallSizeInfo fcallSizes) {
    ArrayList<DataIdentifier> inputVars = fstmt.getInputParams();
    ArrayList<Hop> inputOps = fop.getInput();
    String fkey = fop.getFunctionKey();
    for (int i = 0; i < inputVars.size(); i++) {
        // create mapping between input hops and vars
        DataIdentifier dat = inputVars.get(i);
        Hop input = inputOps.get(i);
        if (input.getDataType() == DataType.MATRIX) {
            // propagate matrix characteristics
            MatrixObject mo = new MatrixObject(ValueType.DOUBLE, null);
            MatrixCharacteristics mc = new MatrixCharacteristics(input.getDim1(), input.getDim2(), ConfigurationManager.getBlocksize(), ConfigurationManager.getBlocksize(), fcallSizes.isSafeNnz(fkey, i) ? input.getNnz() : -1);
            MetaDataFormat meta = new MetaDataFormat(mc, null, null);
            mo.setMetaData(meta);
            vars.put(dat.getName(), mo);
        } else if (input.getDataType() == DataType.SCALAR) {
            // (for multiple calls, literal equivalence already checked)
            if (input instanceof LiteralOp) {
                vars.put(dat.getName(), ScalarObjectFactory.createScalarObject(input.getValueType(), (LiteralOp) input));
            } else // and input scalar is existing variable in symbol table
            if (PROPAGATE_SCALAR_VARS_INTO_FUN && fcallSizes.getFunctionCallCount(fkey) == 1 && input instanceof DataOp) {
                Data scalar = callvars.get(input.getName());
                if (scalar != null && scalar instanceof ScalarObject) {
                    vars.put(dat.getName(), scalar);
                }
            }
        }
    }
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) DataIdentifier(org.apache.sysml.parser.DataIdentifier) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) Hop(org.apache.sysml.hops.Hop) Data(org.apache.sysml.runtime.instructions.cp.Data) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) ScalarObject(org.apache.sysml.runtime.instructions.cp.ScalarObject) LiteralOp(org.apache.sysml.hops.LiteralOp) DataOp(org.apache.sysml.hops.DataOp)

Example 35 with MetaDataFormat

use of org.apache.sysml.runtime.matrix.MetaDataFormat in project incubator-systemml by apache.

the class Recompiler method checkCPReblock.

/**
 * CP Reblock check for spark instructions; in contrast to MR, we can not
 * rely on the input file sizes because inputs might be passed via rdds.
 *
 * @param ec execution context
 * @param varin variable
 * @return true if CP reblock?
 */
public static boolean checkCPReblock(ExecutionContext ec, String varin) {
    CacheableData<?> obj = ec.getCacheableData(varin);
    MatrixCharacteristics mc = ec.getMatrixCharacteristics(varin);
    long rows = mc.getRows();
    long cols = mc.getCols();
    long nnz = mc.getNonZeros();
    // check valid cp reblock recompilation hook
    if (!ConfigurationManager.isDynamicRecompilation() || !OptimizerUtils.isHybridExecutionMode()) {
        return false;
    }
    // robustness for usage through mlcontext (key/values of input rdds are
    // not serializable for text; also bufferpool rdd read only supported for
    // binarycell and binaryblock)
    MetaDataFormat iimd = (MetaDataFormat) obj.getMetaData();
    if (obj.getRDDHandle() != null && iimd.getInputInfo() != InputInfo.BinaryBlockInputInfo && iimd.getInputInfo() != InputInfo.BinaryCellInputInfo) {
        return false;
    }
    // robustness unknown dimensions, e.g., for csv reblock
    if (rows <= 0 || cols <= 0) {
        try {
            long size = MapReduceTool.getFilesizeOnHDFS(new Path(obj.getFileName()));
            return (size < OptimizerUtils.getLocalMemBudget() && size < CP_CSV_REBLOCK_UNKNOWN_THRESHOLD_SIZE * OptimizerUtils.getParallelTextReadParallelism());
        } catch (IllegalArgumentException | IOException ex) {
            throw new DMLRuntimeException(ex);
        }
    }
    // check valid dimensions and memory requirements
    double sp = OptimizerUtils.getSparsity(rows, cols, nnz);
    double mem = MatrixBlock.estimateSizeInMemory(rows, cols, sp);
    if (!OptimizerUtils.isValidCPDimensions(rows, cols) || !OptimizerUtils.isValidCPMatrixSize(rows, cols, sp) || mem >= OptimizerUtils.getLocalMemBudget()) {
        return false;
    }
    // check in-memory reblock size threshold (preference: distributed)
    // conservative estimate
    long estFilesize = (long) (3.5 * mem);
    long cpThreshold = CP_REBLOCK_THRESHOLD_SIZE * OptimizerUtils.getParallelTextReadParallelism();
    return (estFilesize < cpThreshold);
}
Also used : Path(org.apache.hadoop.fs.Path) MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) IOException(java.io.IOException) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Aggregations

MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)54 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)47 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)26 OutputInfo (org.apache.sysml.runtime.matrix.data.OutputInfo)17 IOException (java.io.IOException)12 ValueType (org.apache.sysml.parser.Expression.ValueType)10 InputInfo (org.apache.sysml.runtime.matrix.data.InputInfo)10 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)9 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)9 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)7 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)5 Path (org.apache.hadoop.fs.Path)4 LongWritable (org.apache.hadoop.io.LongWritable)4 Text (org.apache.hadoop.io.Text)4 Data (org.apache.sysml.runtime.instructions.cp.Data)4 ScalarObject (org.apache.sysml.runtime.instructions.cp.ScalarObject)4 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)4 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)4 DataOp (org.apache.sysml.hops.DataOp)3