Search in sources :

Example 1 with ConvertStringToLongTextPair

use of org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringCSVToFrameObject.

/**
	 * Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject}
	 * 
	 * @param variableName
	 *            name of the variable associated with the frame
	 * @param javaRDD
	 *            the Java RDD of strings
	 * @param frameMetadata
	 *            frame metadata
	 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
	 */
public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc(), javaPairRDDText, mc, frameObject.getSchema(), false, ",", false, -1);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd, variableName));
    return frameObject;
}
Also used : ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixFormatMetaData(org.apache.sysml.runtime.matrix.MatrixFormatMetaData) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 2 with ConvertStringToLongTextPair

use of org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringCSVToMatrixObject.

/**
 * Convert a {@code JavaRDD<String>} in CSV format to a {@code MatrixObject}
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param matrixMetadata
 *            matrix metadata
 * @return the {@code JavaRDD<String>} converted to a {@code MatrixObject}
 */
public static MatrixObject javaRDDStringCSVToMatrixObject(JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
    JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
    matrixObject.setRDDHandle(new RDDObject(javaPairRDD2));
    return matrixObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 3 with ConvertStringToLongTextPair

use of org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringIJVToMatrixObject.

/**
 * Convert a {@code JavaRDD<String>} in IJV format to a {@code MatrixObject}
 * . Note that metadata is required for IJV format.
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param matrixMetadata
 *            matrix metadata
 * @return the {@code JavaRDD<String>} converted to a {@code MatrixObject}
 */
public static MatrixObject javaRDDStringIJVToMatrixObject(JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo));
    JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
    matrixObject.setRDDHandle(new RDDObject(javaPairRDD2));
    return matrixObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Example 4 with ConvertStringToLongTextPair

use of org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair in project incubator-systemml by apache.

the class MLContextConversionUtil method javaRDDStringCSVToFrameObject.

/**
 * Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject}
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param frameMetadata
 *            frame metadata
 * @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
 */
public static FrameObject javaRDDStringCSVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
    FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
    JavaPairRDD<Long, FrameBlock> rdd;
    try {
        rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc(), javaPairRDDText, mc, frameObject.getSchema(), false, ",", false, -1);
    } catch (DMLRuntimeException e) {
        e.printStackTrace();
        return null;
    }
    frameObject.setRDDHandle(new RDDObject(rdd));
    return frameObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) ValueType(org.apache.sysml.parser.Expression.ValueType) Text(org.apache.hadoop.io.Text) FrameObject(org.apache.sysml.runtime.controlprogram.caching.FrameObject) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics) DMLRuntimeException(org.apache.sysml.runtime.DMLRuntimeException) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) FrameBlock(org.apache.sysml.runtime.matrix.data.FrameBlock) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) LongWritable(org.apache.hadoop.io.LongWritable)

Example 5 with ConvertStringToLongTextPair

use of org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair in project systemml by apache.

the class MLContextConversionUtil method javaRDDStringCSVToMatrixObject.

/**
 * Convert a {@code JavaRDD<String>} in CSV format to a {@code MatrixObject}
 *
 * @param javaRDD
 *            the Java RDD of strings
 * @param matrixMetadata
 *            matrix metadata
 * @return the {@code JavaRDD<String>} converted to a {@code MatrixObject}
 */
public static MatrixObject javaRDDStringCSVToMatrixObject(JavaRDD<String> javaRDD, MatrixMetadata matrixMetadata) {
    JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
    MatrixCharacteristics mc = (matrixMetadata != null) ? matrixMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
    MatrixObject matrixObject = new MatrixObject(ValueType.DOUBLE, OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo));
    JavaPairRDD<LongWritable, Text> javaPairRDD2 = javaPairRDD.mapToPair(new CopyTextInputFunction());
    matrixObject.setRDDHandle(new RDDObject(javaPairRDD2));
    return matrixObject;
}
Also used : MetaDataFormat(org.apache.sysml.runtime.matrix.MetaDataFormat) CopyTextInputFunction(org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction) MatrixObject(org.apache.sysml.runtime.controlprogram.caching.MatrixObject) ConvertStringToLongTextPair(org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair) RDDObject(org.apache.sysml.runtime.instructions.spark.data.RDDObject) Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) MatrixCharacteristics(org.apache.sysml.runtime.matrix.MatrixCharacteristics)

Aggregations

LongWritable (org.apache.hadoop.io.LongWritable)12 Text (org.apache.hadoop.io.Text)12 RDDObject (org.apache.sysml.runtime.instructions.spark.data.RDDObject)12 ConvertStringToLongTextPair (org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair)12 CopyTextInputFunction (org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction)12 MatrixCharacteristics (org.apache.sysml.runtime.matrix.MatrixCharacteristics)12 MetaDataFormat (org.apache.sysml.runtime.matrix.MetaDataFormat)8 ValueType (org.apache.sysml.parser.Expression.ValueType)6 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)6 FrameObject (org.apache.sysml.runtime.controlprogram.caching.FrameObject)6 MatrixObject (org.apache.sysml.runtime.controlprogram.caching.MatrixObject)6 FrameBlock (org.apache.sysml.runtime.matrix.data.FrameBlock)6 MatrixFormatMetaData (org.apache.sysml.runtime.matrix.MatrixFormatMetaData)4