use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextConversionUtil method dataFrameToFrameObject.
/**
* Convert a {@code DataFrame} to a {@code FrameObject}.
*
* @param dataFrame
* the Spark {@code DataFrame}
* @param frameMetadata
* the frame metadata
* @return the {@code DataFrame} frame converted to a converted to a
* {@code FrameObject}
*/
public static FrameObject dataFrameToFrameObject(Dataset<Row> dataFrame, FrameMetadata frameMetadata) {
try {
// setup meta data and java spark context
if (frameMetadata == null)
frameMetadata = new FrameMetadata();
determineFrameFormatIfNeeded(dataFrame, frameMetadata);
boolean containsID = isDataFrameWithIDColumn(frameMetadata);
MatrixCharacteristics mc = frameMetadata.asMatrixCharacteristics();
if (mc == null)
mc = new MatrixCharacteristics();
// convert data frame and obtain column names / schema
// TODO extend frame schema by column names (right now dropped)
Pair<String[], ValueType[]> ret = new Pair<>();
JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(jsc(), dataFrame, mc, containsID, ret);
frameMetadata.setFrameSchema(new FrameSchema(Arrays.asList(ret.getValue())));
// required due to meta
frameMetadata.setMatrixCharacteristics(mc);
return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlock, frameMetadata);
} catch (DMLRuntimeException e) {
throw new MLContextException("Exception converting DataFrame to FrameObject", e);
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class MLContextUtil method convertInputType.
/**
* Convert input types to internal SystemML representations
*
* @param parameterName
* The name of the input parameter
* @param parameterValue
* The value of the input parameter
* @param metadata
* matrix/frame metadata
* @return input in SystemML data representation
*/
public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) {
String name = parameterName;
Object value = parameterValue;
boolean hasMetadata = (metadata != null) ? true : false;
boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false;
boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false;
if (name == null) {
throw new MLContextException("Input parameter name is null");
} else if (value == null) {
throw new MLContextException("Input parameter value is null for: " + parameterName);
} else if (value instanceof JavaRDD<?>) {
@SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value;
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
return MLContextConversionUtil.javaRDDStringIJVToMatrixObject(javaRDD, matrixMetadata);
} else {
return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD, matrixMetadata);
}
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
return MLContextConversionUtil.javaRDDStringIJVToFrameObject(javaRDD, frameMetadata);
} else {
return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD, frameMetadata);
}
} else if (!hasMetadata) {
String firstLine = javaRDD.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
return MLContextConversionUtil.javaRDDStringCSVToMatrixObject(javaRDD);
} else {
return MLContextConversionUtil.javaRDDStringCSVToFrameObject(javaRDD);
}
}
} else if (value instanceof RDD<?>) {
@SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value;
if (hasMatrixMetadata) {
MatrixMetadata matrixMetadata = (MatrixMetadata) metadata;
if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) {
return MLContextConversionUtil.rddStringIJVToMatrixObject(rdd, matrixMetadata);
} else {
return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd, matrixMetadata);
}
} else if (hasFrameMetadata) {
FrameMetadata frameMetadata = (FrameMetadata) metadata;
if (frameMetadata.getFrameFormat() == FrameFormat.IJV) {
return MLContextConversionUtil.rddStringIJVToFrameObject(rdd, frameMetadata);
} else {
return MLContextConversionUtil.rddStringCSVToFrameObject(rdd, frameMetadata);
}
} else if (!hasMetadata) {
String firstLine = rdd.first();
boolean isAllNumbers = isCSVLineAllNumbers(firstLine);
if (isAllNumbers) {
return MLContextConversionUtil.rddStringCSVToMatrixObject(rdd);
} else {
return MLContextConversionUtil.rddStringCSVToFrameObject(rdd);
}
}
} else if (value instanceof MatrixBlock) {
MatrixBlock matrixBlock = (MatrixBlock) value;
return MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, (MatrixMetadata) metadata);
} else if (value instanceof FrameBlock) {
FrameBlock frameBlock = (FrameBlock) value;
return MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, (FrameMetadata) metadata);
} else if (value instanceof Dataset<?>) {
@SuppressWarnings("unchecked") Dataset<Row> dataFrame = (Dataset<Row>) value;
dataFrame = MLUtils.convertVectorColumnsToML(dataFrame);
if (hasMatrixMetadata) {
return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame, (MatrixMetadata) metadata);
} else if (hasFrameMetadata) {
return MLContextConversionUtil.dataFrameToFrameObject(dataFrame, (FrameMetadata) metadata);
} else if (!hasMetadata) {
boolean looksLikeMatrix = doesDataFrameLookLikeMatrix(dataFrame);
if (looksLikeMatrix) {
return MLContextConversionUtil.dataFrameToMatrixObject(dataFrame);
} else {
return MLContextConversionUtil.dataFrameToFrameObject(dataFrame);
}
}
} else if (value instanceof Matrix) {
Matrix matrix = (Matrix) value;
if ((matrix.hasBinaryBlocks()) && (!matrix.hasMatrixObject())) {
if (metadata == null) {
metadata = matrix.getMatrixMetadata();
}
JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = matrix.toBinaryBlocks();
return MLContextConversionUtil.binaryBlocksToMatrixObject(binaryBlocks, (MatrixMetadata) metadata);
} else {
return matrix.toMatrixObject();
}
} else if (value instanceof Frame) {
Frame frame = (Frame) value;
if ((frame.hasBinaryBlocks()) && (!frame.hasFrameObject())) {
if (metadata == null) {
metadata = frame.getFrameMetadata();
}
JavaPairRDD<Long, FrameBlock> binaryBlocks = frame.toBinaryBlocks();
return MLContextConversionUtil.binaryBlocksToFrameObject(binaryBlocks, (FrameMetadata) metadata);
} else {
return frame.toFrameObject();
}
} else if (value instanceof double[][]) {
double[][] doubleMatrix = (double[][]) value;
return MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, (MatrixMetadata) metadata);
} else if (value instanceof URL) {
URL url = (URL) value;
return MLContextConversionUtil.urlToMatrixObject(url, (MatrixMetadata) metadata);
} else if (value instanceof Integer) {
return new IntObject((Integer) value);
} else if (value instanceof Double) {
return new DoubleObject((Double) value);
} else if (value instanceof String) {
return new StringObject((String) value);
} else if (value instanceof Boolean) {
return new BooleanObject((Boolean) value);
}
return null;
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class Recompiler method executeInMemoryFrameReblock.
public static void executeInMemoryFrameReblock(ExecutionContext ec, String varin, String varout) {
FrameObject in = ec.getFrameObject(varin);
FrameObject out = ec.getFrameObject(varout);
// read text input frame (through buffer pool, frame object carries all relevant
// information including additional arguments for csv reblock)
FrameBlock fb = in.acquireRead();
// set output (incl update matrix characteristics)
out.acquireModify(fb);
out.release();
in.release();
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class DataFrameRowFrameConversionTest method testDataFrameConversion.
private void testDataFrameConversion(ValueType vt, boolean singleColBlock, boolean dense, boolean unknownDims) {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;
try {
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
// generate input data and setup metadata
int cols = singleColBlock ? cols1 : cols2;
double sparsity = dense ? sparsity1 : sparsity2;
double[][] A = getRandomMatrix(rows1, cols, -10, 10, sparsity, 2373);
A = (vt == ValueType.INT) ? TestUtils.round(A) : A;
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
FrameBlock fbA = DataConverter.convertToFrameBlock(mbA, vt);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
ValueType[] schema = UtilFunctions.nCopies(cols, vt);
// get binary block input rdd
JavaPairRDD<Long, FrameBlock> in = SparkExecutionContext.toFrameJavaPairRDD(sc, fbA);
// frame - dataframe - frame conversion
Dataset<Row> df = FrameRDDConverterUtils.binaryBlockToDataFrame(spark, in, mc1, schema);
JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, true);
// get output frame block
FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, schema, rows1, cols);
// compare frame blocks
MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
double[][] B = DataConverter.convertToDoubleMatrix(mbB);
TestUtils.compareMatrices(A, B, rows1, cols, eps);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
DMLScript.rtplatform = oldPlatform;
}
}
use of org.apache.sysml.runtime.matrix.data.FrameBlock in project incubator-systemml by apache.
the class DataFrameVectorFrameConversionTest method testDataFrameConversion.
private void testDataFrameConversion(ValueType[] schema, boolean containsID, boolean dense, boolean unknownDims) {
boolean oldConfig = DMLScript.USE_LOCAL_SPARK_CONFIG;
RUNTIME_PLATFORM oldPlatform = DMLScript.rtplatform;
try {
DMLScript.USE_LOCAL_SPARK_CONFIG = true;
DMLScript.rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK;
// generate input data and setup metadata
int cols = schema.length + colsVector - 1;
double sparsity = dense ? sparsity1 : sparsity2;
double[][] A = TestUtils.round(getRandomMatrix(rows1, cols, -10, 1000, sparsity, 2373));
MatrixBlock mbA = DataConverter.convertToMatrixBlock(A);
int blksz = ConfigurationManager.getBlocksize();
MatrixCharacteristics mc1 = new MatrixCharacteristics(rows1, cols, blksz, blksz, mbA.getNonZeros());
MatrixCharacteristics mc2 = unknownDims ? new MatrixCharacteristics() : new MatrixCharacteristics(mc1);
// create input data frame
Dataset<Row> df = createDataFrame(spark, mbA, containsID, schema);
// dataframe - frame conversion
JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.dataFrameToBinaryBlock(sc, df, mc2, containsID);
// get output frame block
FrameBlock fbB = SparkExecutionContext.toFrameBlock(out, UtilFunctions.nCopies(cols, ValueType.DOUBLE), rows1, cols);
// compare frame blocks
MatrixBlock mbB = DataConverter.convertToMatrixBlock(fbB);
double[][] B = DataConverter.convertToDoubleMatrix(mbB);
TestUtils.compareMatrices(A, B, rows1, cols, eps);
} catch (Exception ex) {
throw new RuntimeException(ex);
} finally {
DMLScript.USE_LOCAL_SPARK_CONFIG = oldConfig;
DMLScript.rtplatform = oldPlatform;
}
}
Aggregations