Search in sources :

Example 16 with Column

use of org.apache.spark.sql.Column in project ignite by apache.

the class JavaEmbeddedIgniteRDDSelfTest method testQueryFieldsFromIgnite.

/**
 * @throws Exception If failed.
 */
@Test
public void testQueryFieldsFromIgnite() throws Exception {
    JavaSparkContext sc = createContext();
    JavaIgniteContext<String, Entity> ic = null;
    try {
        ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);
        JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME);
        cache.savePairs(sc.parallelize(F.range(0, 1001), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true, false);
        Dataset<Row> df = cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
        df.printSchema();
        Row[] res = (Row[]) df.collect();
        assertEquals("Invalid result length", 1, res.length);
        assertEquals("Invalid result", 50, res[0].get(0));
        assertEquals("Invalid result", "name50", res[0].get(1));
        assertEquals("Invalid result", 5000, res[0].get(2));
        Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
        Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
        df.printSchema();
        Row[] res0 = (Row[]) df0.collect();
        assertEquals("Invalid result length", 1, res0.length);
        assertEquals("Invalid result", 50, res0[0].get(0));
        assertEquals("Invalid result", "name50", res0[0].get(1));
        assertEquals("Invalid result", 5000, res0[0].get(2));
        assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
    } finally {
        if (ic != null)
            ic.close(true);
        sc.stop();
    }
}
Also used : Column(org.apache.spark.sql.Column) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 17 with Column

use of org.apache.spark.sql.Column in project ignite by apache.

the class JavaStandaloneIgniteRDDSelfTest method testQueryFieldsFromIgnite.

/**
 * @throws Exception If failed.
 */
@Test
public void testQueryFieldsFromIgnite() throws Exception {
    JavaSparkContext sc = new JavaSparkContext("local[*]", "test");
    try {
        JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());
        JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME);
        cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F));
        Dataset<Row> df = cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);
        df.printSchema();
        Row[] res = (Row[]) df.collect();
        assertEquals("Invalid result length", 1, res.length);
        assertEquals("Invalid result", 50, res[0].get(0));
        assertEquals("Invalid result", "name50", res[0].get(1));
        assertEquals("Invalid result", 5000, res[0].get(2));
        Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));
        Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);
        df.printSchema();
        Row[] res0 = (Row[]) df0.collect();
        assertEquals("Invalid result length", 1, res0.length);
        assertEquals("Invalid result", 50, res0[0].get(0));
        assertEquals("Invalid result", "name50", res0[0].get(1));
        assertEquals("Invalid result", 5000, res0[0].get(2));
        assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
    } finally {
        sc.stop();
    }
}
Also used : Column(org.apache.spark.sql.Column) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Test(org.junit.Test)

Example 18 with Column

use of org.apache.spark.sql.Column in project net.jgp.labs.spark by jgperrin.

the class DataframeUtils method addMetadata.

public static Dataset<Row> addMetadata(Dataset<Row> df, String colName, String key, String value) {
    Metadata metadata = new MetadataBuilder().withMetadata(ColumnUtils.getMetadata(df, colName)).putString(key, value).build();
    Column col = col(colName);
    return df.withColumn(colName, col, metadata);
}
Also used : MetadataBuilder(org.apache.spark.sql.types.MetadataBuilder) Column(org.apache.spark.sql.Column) Metadata(org.apache.spark.sql.types.Metadata)

Aggregations

Column (org.apache.spark.sql.Column)18 Test (org.junit.Test)8 DataFrame (org.apache.spark.sql.DataFrame)6 Nonnull (javax.annotation.Nonnull)5 StructField (org.apache.spark.sql.types.StructField)4 KyloCatalogClient (com.thinkbiganalytics.kylo.catalog.api.KyloCatalogClient)3 Row (org.apache.spark.sql.Row)3 ScalaUDF (org.apache.spark.sql.catalyst.expressions.ScalaUDF)3 DataType (org.apache.spark.sql.types.DataType)3 JdbcHighWaterMark (com.thinkbiganalytics.kylo.catalog.spark.sources.jdbc.JdbcHighWaterMark)2 DataSet (com.thinkbiganalytics.spark.DataSet)2 GridCommonAbstractTest (org.apache.ignite.testframework.junits.common.GridCommonAbstractTest)2 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)2 UserDefinedFunction (org.apache.spark.sql.UserDefinedFunction)2 Metadata (org.apache.spark.sql.types.Metadata)2 MetadataBuilder (org.apache.spark.sql.types.MetadataBuilder)2 StructType (org.apache.spark.sql.types.StructType)2 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 DefaultQueryResultColumn (com.thinkbiganalytics.discovery.model.DefaultQueryResultColumn)1 DataSetOptions (com.thinkbiganalytics.kylo.catalog.spi.DataSetOptions)1