Search in sources :

Example 41 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class Connection method convertToFrame.

/**
 * Converts an input stream of a frame in csv or textcell format
 * into a frame block.
 *
 * @param input InputStream to a string frame in csv or textcell format
 * @param rows number of rows in the frame
 * @param cols number of columns in the frame
 * @param format input format of the given stream
 * @return frame as a frame block
 * @throws IOException if IOException occurs
 */
public FrameBlock convertToFrame(InputStream input, int rows, int cols, String format) throws IOException {
    FrameBlock ret = null;
    // sanity check input format
    if (!(DataExpression.FORMAT_TYPE_VALUE_TEXT.equals(format) || DataExpression.FORMAT_TYPE_VALUE_MATRIXMARKET.equals(format) || DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format))) {
        throw new IOException("Invalid input format (expected: csv, text or mm): " + format);
    }
    setLocalConfigs();
    try {
        // read input frame
        InputInfo iinfo = DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format) ? InputInfo.CSVInputInfo : InputInfo.TextCellInputInfo;
        FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
        ret = reader.readFrameFromInputStream(input, rows, cols);
    } catch (DMLRuntimeException rex) {
        throw new IOException(rex);
    }
    return ret;
}
Also used : InputInfo(org.apache.sysml.runtime.matrix.data.InputInfo) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) IOException(java.io.IOException) FrameReader(org.apache.sysml.runtime.io.FrameReader) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 42 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextConversionUtil method frameObjectToDataFrame.

/**
 * Convert a {@code FrameObject} to a {@code DataFrame}.
 *
 * @param frameObject
 *            the {@code FrameObject}
 * @param sparkExecutionContext
 *            the Spark execution context
 * @return the {@code FrameObject} converted to a {@code DataFrame}
 */
public static Dataset<Row> frameObjectToDataFrame(FrameObject frameObject, SparkExecutionContext sparkExecutionContext) {
    try {
        @SuppressWarnings("unchecked") JavaPairRDD<Long, FrameBlock> binaryBlockFrame = (JavaPairRDD<Long, FrameBlock>) sparkExecutionContext.getRDDHandleForFrameObject(frameObject, InputInfo.BinaryBlockInputInfo);
        MatrixCharacteristics mc = frameObject.getMatrixCharacteristics();
        return FrameRDDConverterUtils.binaryBlockToDataFrame(spark(), binaryBlockFrame, mc, frameObject.getSchema());
    } catch (DMLRuntimeException e) {
        throw new MLContextException("DMLRuntimeException while converting frame object to DataFrame", e);
    }
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException)

Example 43 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.

/**
 * Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
 * . Note that metadata is required for IJV format.
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param frameMetadata
 *            frame metadata
 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
 */
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        ValueType[] lschema = null;
        if (lschema == null)
            lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
        rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 44 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextConversionUtil method frameObjectTo2DStringArray.

/**
 * Convert a {@code FrameObject} to a two-dimensional string array.
 *
 * @param frameObject
 *            the {@code FrameObject}
 * @return the {@code FrameObject} converted to a {@code String[][]}
 */
public static String[][] frameObjectTo2DStringArray(FrameObject frameObject) {
    FrameBlock fb = frameObject.acquireRead();
    String[][] frame = DataConverter.convertToStringFrame(fb);
    frameObject.release();
    return frame;
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock)

Example 45 with FrameBlock

use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.

the class MLContextConversionUtil method frameObjectToListStringCSV.

/**
 * Convert a {@code FrameObject} to a {@code List<String>} in CSV format.
 *
 * @param frameObject
 *            the {@code FrameObject}
 * @param delimiter
 *            the delimiter
 * @return the {@code FrameObject} converted to a {@code List<String>}
 */
public static List<String> frameObjectToListStringCSV(FrameObject frameObject, String delimiter) {
    FrameBlock fb = frameObject.acquireRead();
    int rows = fb.getNumRows();
    int cols = fb.getNumColumns();
    List<String> list = new ArrayList<>();
    for (int i = 0; i < rows; i++) {
        StringBuilder sb = new StringBuilder();
        for (int j = 0; j < cols; j++) {
            if (j > 0) {
                sb.append(delimiter);
            }
            if (fb.get(i, j) != null) {
                sb.append(fb.get(i, j));
            }
        }
        list.add(sb.toString());
    }
    frameObject.release();
    return list;
}
Also used : FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) ArrayList(java.util.ArrayList)

Aggregations

FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)90 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)28 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)26 ValueType (org.apache.sysml.parser.Expression.ValueType)23 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)23 FrameReader (org.apache.sysml.runtime.io.FrameReader)18 IOException (java.io.IOException)16 RUNTIME_PLATFORM (org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM)16 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)15 LongWritable (org.apache.hadoop.io.LongWritable)12 JavaPairRDD (org.apache.spark.api.java.JavaPairRDD)11 CSVFileFormatProperties (org.apache.sysml.runtime.matrix.data.CSVFileFormatProperties)11 FrameWriter (org.apache.sysml.runtime.io.FrameWriter)9 TestConfiguration (org.apache.sysml.test.integration.TestConfiguration)8 Text (org.apache.hadoop.io.Text)7 SparkExecutionContext (org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext)7 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)7 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)6 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)5 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)5