use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class Connection method convertToFrame.
/**
* Converts an input stream of a frame in csv or textcell format
* into a frame block.
*
* @param input InputStream to a string frame in csv or textcell format
* @param rows number of rows in the frame
* @param cols number of columns in the frame
* @param format input format of the given stream
* @return frame as a frame block
* @throws IOException if IOException occurs
*/
public FrameBlock convertToFrame(InputStream input, int rows, int cols, String format) throws IOException {
FrameBlock ret = null;
// sanity check input format
if (!(DataExpression.FORMAT_TYPE_VALUE_TEXT.equals(format) || DataExpression.FORMAT_TYPE_VALUE_MATRIXMARKET.equals(format) || DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format))) {
throw new IOException("Invalid input format (expected: csv, text or mm): " + format);
}
setLocalConfigs();
try {
// read input frame
InputInfo iinfo = DataExpression.FORMAT_TYPE_VALUE_CSV.equals(format) ? InputInfo.CSVInputInfo : InputInfo.TextCellInputInfo;
FrameReader reader = FrameReaderFactory.createFrameReader(iinfo);
ret = reader.readFrameFromInputStream(input, rows, cols);
} catch (DMLRuntimeException rex) {
throw new IOException(rex);
}
return ret;
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextConversionUtil method frameObjectToDataFrame.
/**
* Convert a {@code FrameObject} to a {@code DataFrame}.
*
* @param frameObject
* the {@code FrameObject}
* @param sparkExecutionContext
* the Spark execution context
* @return the {@code FrameObject} converted to a {@code DataFrame}
*/
public static Dataset<Row> frameObjectToDataFrame(FrameObject frameObject, SparkExecutionContext sparkExecutionContext) {
try {
@SuppressWarnings("unchecked") JavaPairRDD<Long, FrameBlock> binaryBlockFrame = (JavaPairRDD<Long, FrameBlock>) sparkExecutionContext.getRDDHandleForFrameObject(frameObject, InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics mc = frameObject.getMatrixCharacteristics();
return FrameRDDConverterUtils.binaryBlockToDataFrame(spark(), binaryBlockFrame, mc, frameObject.getSchema());
} catch (DMLRuntimeException e) {
throw new MLContextException("DMLRuntimeException while converting frame object to DataFrame", e);
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextConversionUtil method javaRDDStringIJVToFrameObject.
/**
* Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject}
* . Note that metadata is required for IJV format.
*
* @param javaRDD
* the Java RDD of strings
* @param frameMetadata
* frame metadata
* @return the {@code JavaRDD<String>} converted to a {@code FrameObject}
*/
public static FrameObject javaRDDStringIJVToFrameObject(JavaRDD<String> javaRDD, FrameMetadata frameMetadata) {
JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair());
MatrixCharacteristics mc = (frameMetadata != null) ? frameMetadata.asMatrixCharacteristics() : new MatrixCharacteristics();
JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction());
FrameObject frameObject = new FrameObject(OptimizerUtils.getUniqueTempFileName(), new MetaDataFormat(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo), frameMetadata.getFrameSchema().getSchema().toArray(new ValueType[0]));
JavaPairRDD<Long, FrameBlock> rdd;
try {
ValueType[] lschema = null;
if (lschema == null)
lschema = UtilFunctions.nCopies((int) mc.getCols(), ValueType.STRING);
rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc(), javaPairRDDText, mc, lschema);
} catch (DMLRuntimeException e) {
e.printStackTrace();
return null;
}
frameObject.setRDDHandle(new RDDObject(rdd));
return frameObject;
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextConversionUtil method frameObjectTo2DStringArray.
/**
* Convert a {@code FrameObject} to a two-dimensional string array.
*
* @param frameObject
* the {@code FrameObject}
* @return the {@code FrameObject} converted to a {@code String[][]}
*/
public static String[][] frameObjectTo2DStringArray(FrameObject frameObject) {
FrameBlock fb = frameObject.acquireRead();
String[][] frame = DataConverter.convertToStringFrame(fb);
frameObject.release();
return frame;
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextConversionUtil method frameObjectToListStringCSV.
/**
* Convert a {@code FrameObject} to a {@code List<String>} in CSV format.
*
* @param frameObject
* the {@code FrameObject}
* @param delimiter
* the delimiter
* @return the {@code FrameObject} converted to a {@code List<String>}
*/
public static List<String> frameObjectToListStringCSV(FrameObject frameObject, String delimiter) {
FrameBlock fb = frameObject.acquireRead();
int rows = fb.getNumRows();
int cols = fb.getNumColumns();
List<String> list = new ArrayList<>();
for (int i = 0; i < rows; i++) {
StringBuilder sb = new StringBuilder();
for (int j = 0; j < cols; j++) {
if (j > 0) {
sb.append(delimiter);
}
if (fb.get(i, j) != null) {
sb.append(fb.get(i, j));
}
}
list.add(sb.toString());
}
frameObject.release();
return list;
}
Aggregations