Search in sources :

Example 46 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextConversionUtil method dataFrameToFrameObject.

/**
 * Convert a {@code DataFrame} to a {@code FrameObject}.
 *
 * @param dataFrame
 *            the Spark {@code DataFrame}
 * @param frameMetadata
 *            the frame metadata
 * @return the {@code DataFrame} frame converted to a converted to a
 *         {@code FrameObject}
 */
public static FrameObject dataFrameToFrameObject(Dataset<Row> dataFrame, FrameMetadata frameMetadata) {
    try {
        // setup meta data and java spark context
        if (frameMetadata == null)
            frameMetadata = new FrameMetadata();
        determineFrameFormatIfNeeded(dataFrame, frameMetadata);
        boolean containsID = isDataFrameWithIDColumn(frameMetadata);
        MatrixCharacteristics mc = frameMetadata.asMatrixCharacteristics();
        if (mc == null)
            mc = new MatrixCharacteristics();
        // convert data frame and obtain column names / schema
        // TODO extend frame schema by column names (right now dropped)
        Pair<String[], ValueType[]> ret = new Pair<>();
        JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(jsc(), dataFrame, mc, containsID, ret);
        frameMetadata.setFrameSchema(new FrameSchema(Arrays.asList(ret.getValue())));
        // required due to meta
        frameMetadata.setMatrixCharacteristics(mc);
        return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlock, frameMetadata);
    } catch (DMLRuntimeException e) {
        throw new MLContextException("Exception converting DataFrame to FrameObject", e);
    }
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) Pair(org.apache.sysml.runtime.matrix.data.Pair) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 47 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextUtil method convertInputType.

/**
 * Convert input types to internal SystemML representations
 *
 * @param parameterName
 *            The name of the input parameter
 * @param parameterValue
 *            The value of the input parameter
 * @param metadata
 *            matrix/frame metadata
 * @return input in SystemML data representation
 */
public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) {
    String name = parameterName;
    Object value = parameterValue;
    boolean hasMetadata = (metadata != null) ? true : false;
    boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false;
    boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false;
    if (name == null) {
        throw new MLContextException("Input parameter name is null");
    } else if (value == null) {
        throw new MLContextException("Input parameter value is null for: " + parameterName);
    } else if (value instanceof JavaRDD<?>) {
        @SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value;
        if (hasMatrixMetadata) {
            MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
            if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
                return MLContextConversionUtil.javaRDDStringIJVToMatrixObject(javaRDD, matrixMetadata);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD, matrixMetadata);
            }
        } else if (hasFrameMetadata) {
            FrameMetadata frameMetadata = (FrameMetadata) metadata;
            if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
                return MLContextConversionUtil.javaRDDStringIJVToFrameObject(javaRDD, frameMetadata);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD, frameMetadata);
            }
        } else if (!hasMetadata) {
            String firstLine = javaRDD.first();
            boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
            if (isAllNumbers) {
                return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD);
            } else {
                return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD);
            }
        }
    } else if (value instanceof RDD<?>) {
        @SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value;
        if (hasMatrixMetadata) {
            MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
            if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
                return MLContextConversionUtil.rddStringIJVToMatrixObject(rdd, matrixMetadata);
            } else {
                return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd, matrixMetadata);
            }
        } else if (hasFrameMetadata) {
            FrameMetadata frameMetadata = (FrameMetadata) metadata;
            if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
                return MLContextConversionUtil.rddStringIJVToFrameObject(rdd, frameMetadata);
            } else {
                return MLContextConversionUtil.rddStringCSVToFrameObject(rdd, frameMetadata);
            }
        } else if (!hasMetadata) {
            String firstLine = rdd.first();
            boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
            if (isAllNumbers) {
                return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd);
            } else {
                return MLContextConversionUtil.rddStringCSVToFrameObject(rdd);
            }
        }
    } else if (value instanceof MatrixBlock) {
        MatrixBlock matrixBlock = (MatrixBlock) value;
        return MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, (MatrixMetadata) metadata);
    } else if (value instanceof FrameBlock) {
        FrameBlock frameBlock = (FrameBlock) value;
        return MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, (FrameMetadata) metadata);
    } else if (value instanceof Dataset<?>) {
        @SuppressWarnings("unchecked") Dataset<Row> dataFrame = (Dataset<Row>) value;
        dataFrame = MLUtils.convertVectorColumnsToML(dataFrame);
        if (hasMatrixMetadata) {
            return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame, (MatrixMetadata) metadata);
        } else if (hasFrameMetadata) {
            return MLContextConversionUtil.dataFrameToFrameObject(dataFrame, (FrameMetadata) metadata);
        } else if (!hasMetadata) {
            boolean looksLikeMatrix = doesDataFrameLookLikeMatrix(dataFrame);
            if (looksLikeMatrix) {
                return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame);
            } else {
                return MLContextConversionUtil.dataFrameToFrameObject(dataFrame);
            }
        }
    } else if (value instanceof Matrix) {
        Matrix matrix = (Matrix) value;
        if ((matrix.hasBinaryBlocks()) && (!matrix.hasMatrixObject())) {
            if (metadata == null) {
                metadata = matrix.getMatrixMetadata();
            }
            JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = matrix.toBinaryBlocks();
            return MLContextConversionUtil.binaryBlocksToMatrixObject(binaryBlocks, (MatrixMetadata) metadata);
        } else {
            return matrix.toMatrixObject();
        }
    } else if (value instanceof Frame) {
        Frame frame = (Frame) value;
        if ((frame.hasBinaryBlocks()) && (!frame.hasFrameObject())) {
            if (metadata == null) {
                metadata = frame.getFrameMetadata();
            }
            JavaPairRDD<Long, FrameBlock> binaryBlocks = frame.toBinaryBlocks();
            return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlocks, (FrameMetadata) metadata);
        } else {
            return frame.toFrameObject();
        }
    } else if (value instanceof double[][]) {
        double[][] doubleMatrix = (double[][]) value;
        return MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, (MatrixMetadata) metadata);
    } else if (value instanceof URL) {
        URL url = (URL) value;
        return MLContextConversionUtil.urlToMatrixObject(url, (MatrixMetadata) metadata);
    } else if (value instanceof Integer) {
        return new IntObject((Integer) value);
    } else if (value instanceof Double) {
        return new DoubleObject((Double) value);
    } else if (value instanceof String) {
        return new StringObject((String) value);
    } else if (value instanceof Boolean) {
        return new BooleanObject((Boolean) value);
    }
    return null;
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) URL(java.net.URL) RDD(org.apache.spark.rdd.RDD) JavaRDD(org.apache.spark.api.java.JavaRDD) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) Dataset(org.apache.spark.sql.Dataset) JavaRDD(org.apache.spark.api.java.JavaRDD) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) DoubleObject(org.apache.sysml.runtime.instructions.cp.DoubleObject) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject) IntObject(org.apache.sysml.runtime.instructions.cp.IntObject) StringObject(org.apache.sysml.runtime.instructions.cp.StringObject) Row(org.apache.spark.sql.Row) BooleanObject(org.apache.sysml.runtime.instructions.cp.BooleanObject)

Example 48 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class Recompiler method executeInMemoryFrameReblock.

public static void executeInMemoryFrameReblock(ExecutionContext ec, String varin, String varout) {
    FrameObject in = ec.getFrameObject(varin);
    FrameObject out = ec.getFrameObject(varout);
    // read text input frame (through buffer pool, frame object carries all relevant
    // information including additional arguments for csv reblock)
    FrameBlock fb = in.acquireRead();
    // set output (incl update matrix characteristics)
    out.acquireModify(fb);
    out.release();
    in.release();
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject)

Example 49 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class DataFrameRowFrameConversionTest method testDataFrameConversion.

private void testDataFrameConversion(ValueType vt, boolean singleColBlock, boolean dense, boolean unknownDims) {
    boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;
    try {
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
        // generate input data and setup metadata
        int cols = singleColBlock ? cols1 : cols2;
        double sparsity = dense ? sparsity1 : sparsity2;
        double[][] A = getRandomMatrix(rows1, cols, -10, 10, sparsity, 2373);
        A = (vt == ValueType.INT) ? TestUtils.round(A) : A;
        MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
        FrameBlock fbA = DataConverter.convertToFrameBlock(mbA, vt);
        int blksz = ConfigurationManager.getBlocksize();
        MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
        MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
        ValueType[] schema = UtilFunctions.nCopies(cols, vt);
        // get binary block input rdd
        JavaPairRDD<Long, FrameBlock> in = SparkExecutionContext.toFrameJavaPairRDD(sc, fbA);
        // frame - dataframe - frame conversion
        Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, schema);
        JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true);
        // get output frame block
        FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, schema, rows1, cols);
        // compare frame blocks
        MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
        double[][] B = DataConverter.convertToDoubleMatrix(mbB);
        TestUtils.compareMatrices(A, B, rows1, cols, eps);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
        DMLScript.rtplatform = oldPlatform;
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) ValueType(org.apache.sysml.parser.Expression.ValueType) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Row(org.apache.spark.sql.Row)

Example 50 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class DataFrameVectorFrameConversionTest method testDataFrameConversion.

private void testDataFrameConversion(ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) {
    boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
    RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;
    try {
        DMLScript.USE_LOCAL_SPARK_CONFIG = true;
        DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
        // generate input data and setup metadata
        int cols = schema.length + colsVector - 1;
        double sparsity = dense ? sparsity1 : sparsity2;
        double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373));
        MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
        int blksz = ConfigurationManager.getBlocksize();
        MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
        MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
        // create input data frame
        Dataset<Row> df = createDataFrame(spark, mbA, containsID, schema);
        // dataframe - frame conversion
        JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID);
        // get output frame block
        FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols);
        // compare frame blocks
        MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
        double[][] B = DataConverter.convertToDoubleMatrix(mbB);
        TestUtils.compareMatrices(A, B, rows1, cols, eps);
    } catch (Exception ex) {
        throw new RuntimeException(ex);
    } finally {
        DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
        DMLScript.rtplatform = oldPlatform;
    }
}
Also used : MatrixBlock(org.apache.sysml.runtime.matrix.data.MatrixBlock) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) RUNTIME_PLATFORM(org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) Row(org.apache.spark.sql.Row)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)90 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 ValueType (org.apache.sysml.parser.Expression.ValueType)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)23 FrameReader (org.apache.sysml.runtime.io.FrameReader)18 IOException (java.io.IOException)16 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)16 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)15 LongWritable (org.apache.hadoop.io.LongWritable)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)11 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 Text (org.apache.hadoop.io.Text)7 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)6 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)5 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)5