Search in sources :

Example 16 with StructType

use of org.apache.spark.sql.types.StructType in project incubator-systemml by apache.

the class MLContextTest method testInputBinaryBlockMatrixPYDML.

@Test
public void testInputBinaryBlockMatrixPYDML() {
    System.out.println("MLContextTest - input BinaryBlockMatrix PYDML");
    List<String> list = new ArrayList<String>();
    list.add("10,20,30");
    list.add("40,50,60");
    list.add("70,80,90");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField("C1", DataTypes.StringType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.StringType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.StringType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    BinaryBlockMatrix binaryBlockMatrix = new BinaryBlockMatrix(dataFrame);
    Script script = pydml("avg = avg(M)").in("M", binaryBlockMatrix).out("avg");
    double avg = ml.execute(script).getDouble("avg");
    Assert.assertEquals(50.0, avg, 0.0);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) BinaryBlockMatrix(org.apache.sysml.api.mlcontext.BinaryBlockMatrix) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) Test(org.junit.Test)

Example 17 with StructType

use of org.apache.spark.sql.types.StructType in project incubator-systemml by apache.

the class MLContextTest method testDataFrameSumDMLDoublesWithIDColumn.

@Test
public void testDataFrameSumDMLDoublesWithIDColumn() {
    System.out.println("MLContextTest - DataFrame sum DML, doubles with ID column");
    List<String> list = new ArrayList<String>();
    list.add("1,1,2,3");
    list.add("2,4,5,6");
    list.add("3,7,8,9");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
    Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
    setExpectedStdOut("sum: 45.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Test(org.junit.Test)

Example 18 with StructType

use of org.apache.spark.sql.types.StructType in project incubator-systemml by apache.

the class MLContextTest method testDataFrameGoodMetadataDML.

@Test
public void testDataFrameGoodMetadataDML() {
    System.out.println("MLContextTest - DataFrame good metadata DML");
    List<String> list = new ArrayList<String>();
    list.add("10,20,30");
    list.add("40,50,60");
    list.add("70,80,90");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    MatrixMetadata mm = new MatrixMetadata(3, 3, 9);
    Script script = dml("print('sum: ' + sum(M));").in("M", dataFrame, mm);
    setExpectedStdOut("sum: 450.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Test(org.junit.Test)

Example 19 with StructType

use of org.apache.spark.sql.types.StructType in project incubator-systemml by apache.

the class MLContextTest method testDataFrameSumDMLDoublesWithIDColumnSortCheck.

@Test
public void testDataFrameSumDMLDoublesWithIDColumnSortCheck() {
    System.out.println("MLContextTest - DataFrame sum DML, doubles with ID column sort check");
    List<String> list = new ArrayList<String>();
    list.add("3,7,8,9");
    list.add("1,1,2,3");
    list.add("2,4,5,6");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField(RDDConverterUtils.DF_ID_COLUMN, DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    MatrixMetadata mm = new MatrixMetadata(MatrixFormat.DF_DOUBLES_WITH_INDEX);
    Script script = dml("print('M[1,1]: ' + as.scalar(M[1,1]));").in("M", dataFrame, mm);
    setExpectedStdOut("M[1,1]: 1.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) StructField(org.apache.spark.sql.types.StructField) Row(org.apache.spark.sql.Row) MatrixMetadata(org.apache.sysml.api.mlcontext.MatrixMetadata) Test(org.junit.Test)

Example 20 with StructType

use of org.apache.spark.sql.types.StructType in project incubator-systemml by apache.

the class MLContextTest method testDataFrameSumPYDMLDoublesWithNoIDColumnNoFormatSpecified.

@Test
public void testDataFrameSumPYDMLDoublesWithNoIDColumnNoFormatSpecified() {
    System.out.println("MLContextTest - DataFrame sum PYDML, doubles with no ID column, no format specified");
    List<String> list = new ArrayList<String>();
    list.add("2,2,2");
    list.add("3,3,3");
    list.add("4,4,4");
    JavaRDD<String> javaRddString = sc.parallelize(list);
    JavaRDD<Row> javaRddRow = javaRddString.map(new CommaSeparatedValueStringToDoubleArrayRow());
    List<StructField> fields = new ArrayList<StructField>();
    fields.add(DataTypes.createStructField("C1", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C2", DataTypes.DoubleType, true));
    fields.add(DataTypes.createStructField("C3", DataTypes.DoubleType, true));
    StructType schema = DataTypes.createStructType(fields);
    Dataset<Row> dataFrame = spark.createDataFrame(javaRddRow, schema);
    Script script = pydml("print('sum: ' + sum(M))").in("M", dataFrame);
    setExpectedStdOut("sum: 27.0");
    ml.execute(script);
}
Also used : Script(org.apache.sysml.api.mlcontext.Script) StructField(org.apache.spark.sql.types.StructField) StructType(org.apache.spark.sql.types.StructType) ArrayList(java.util.ArrayList) Row(org.apache.spark.sql.Row) Test(org.junit.Test)

Aggregations

StructType (org.apache.spark.sql.types.StructType)44 StructField (org.apache.spark.sql.types.StructField)40 Row (org.apache.spark.sql.Row)38 ArrayList (java.util.ArrayList)37 Test (org.junit.Test)32 Script (org.apache.sysml.api.mlcontext.Script)31 MatrixMetadata (org.apache.sysml.api.mlcontext.MatrixMetadata)17 VectorUDT (org.apache.spark.ml.linalg.VectorUDT)15 Vector (org.apache.spark.ml.linalg.Vector)12 Tuple2 (scala.Tuple2)6 FrameMetadata (org.apache.sysml.api.mlcontext.FrameMetadata)5 MLResults (org.apache.sysml.api.mlcontext.MLResults)5 DMLRuntimeException (org.apache.sysml.runtime.DMLRuntimeException)5 SparkSession (org.apache.spark.sql.SparkSession)4 DataType (org.apache.spark.sql.types.DataType)4 ValueType (org.apache.sysml.parser.Expression.ValueType)4 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)3 MatrixBlock (org.apache.sysml.runtime.matrix.data.MatrixBlock)3 CommaSeparatedValueStringToDoubleArrayRow (org.apache.sysml.test.integration.mlcontext.MLContextTest.CommaSeparatedValueStringToDoubleArrayRow)3 HashMap (java.util.HashMap)2