Search in sources :

Example 1 with CommaSeparatedValueStringToDoubleArrayRow

use of org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow in project incubator-systemml by apache.

the class MLContextFrameTest method testInputFrameAndMatrixOutputMatrix.

@Test
public void testInputFrameAndMatrixOutputMatrix() {
    System.out.println("MLContextFrameTest - input frame and matrix, output matrix");
    List<String> dataA = new ArrayList<String>();
    dataA.add("Test1,4.0");
    dataA.add("Test2,5.0");
    dataA.add("Test3,6.0");
    JavaRDD<String> javaRddStringA = sc.parallelize(dataA);
    ValueType[] schema = { ValueType.STRING, ValueType.DOUBLE };
    List<String> dataB = new ArrayList<String>();
    dataB.add("1.0");
    dataB.add("2.0");
    JavaRDD<String> javaRddStringB = sc.parallelize(dataB);
    JavaRDD<Row> javaRddRowA = FrameRDDConverterUtils.csvToRowRDD(sc, javaRddStringA, CSV_DELIM, schema);
    JavaRDD<Row> javaRddRowB = javaRddStringB.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fieldsA = new ArrayList<StructField>();
    fieldsA.add(DataTypes.createStructField("1", DataTypes.StringType, true));
    fieldsA.add(DataTypes.createStructField("2", DataTypes.DoubleType, true));
    StructType schemaA = DataTypes.createStructType(fieldsA);
    Dataset<Row> dataFrameA = spark.createDataFrame(javaRddRowA, schemaA);
    List<StructField> fieldsB = new ArrayList<StructField>();
    fieldsB.add(DataTypes.createStructField("1", DataTypes.DoubleType, true));
    StructType schemaB = DataTypes.createStructType(fieldsB);
    Dataset<Row> dataFrameB = spark.createDataFrame(javaRddRowB, schemaB);
    String dmlString = "[tA, tAM] = transformencode (target = A, spec = \"{ids: true ,recode: [ 1, 2 ]}\");\n" + "C = tA %*% B;\n" + "M = s * C;";
    Script script = dml(dmlString).in("A", dataFrameA, new FrameMetadata(FrameFormat.CSV, dataFrameA.count(), (long) dataFrameA.columns().length)).in("B", dataFrameB, new MatrixMetadata(MatrixFormat.CSV, dataFrameB.count(), (long) dataFrameB.columns().length)).in("s", 2).out("M");
    MLResults results = ml.execute(script);
    double[][] matrix = results.getMatrixAs2DDoubleArray("M");
    Assert.assertEquals(6.0, matrix[0][0], 0.0);
    Assert.assertEquals(12.0, matrix[1][0], 0.0);
    Assert.assertEquals(18.0, matrix[2][0], 0.0);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ValueType(org.apache.sysml.parser.Expression.ValueType) MLResults(org.apache.sysml.api.mlcontext.MLResults) ArrayList(java.util.ArrayList) CommaSeparatedValueStringToDoubleArrayRow(org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) CommaSeparatedValueStringToDoubleArrayRow(org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) FrameMetadata(org.apache.sysml.api.mlcontext.FrameMetadata) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 Row (org.apache.spark.sql.Row)1 StructField (org.apache.spark.sql.types.StructField)1 StructType (org.apache.spark.sql.types.StructType)1 FrameMetadata (org.apache.sysml.api.mlcontext.FrameMetadata)1 MLResults (org.apache.sysml.api.mlcontext.MLResults)1 MatrixMetadata (org.apache.sysml.api.mlcontext.MatrixMetadata)1 Script (org.apache.sysml.api.mlcontext.Script)1 ValueType (org.apache.sysml.parser.Expression.ValueType)1 CommaSeparatedValueStringToDoubleArrayRow (org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow)1 Test (org.junit.Test)1