Search in sources :

Example 26 with SparkSqlDialect

use of org.apache.calcite.sql.dialect.SparkSqlDialect in project hopsworks by logicalclocks.

the class TestConstructorController method testTreeWayHudiJoinSQLNode.

@Test
public void testTreeWayHudiJoinSQLNode() throws Exception {
    List<Feature> availableFirst = new ArrayList<>();
    availableFirst.add(new Feature("ft1", "fg0", "Float", null, null));
    List<Feature> availableSecond = new ArrayList<>();
    availableSecond.add(new Feature("ft2", "fg1", "Float", null, null));
    List<Feature> availableThird = new ArrayList<>();
    availableThird.add(new Feature("ft1", "fg2", "Float", null, null));
    Mockito.when(cachedFeaturegroupController.dropHudiSpecFeatures(Mockito.any())).thenReturn(availableSecond, availableThird, Stream.of(availableFirst, availableSecond, availableThird).flatMap(Collection::stream).collect(Collectors.toList()));
    fg1.getCachedFeaturegroup().setTimeTravelFormat(TimeTravelFormat.HUDI);
    fg2.getCachedFeaturegroup().setTimeTravelFormat(TimeTravelFormat.HUDI);
    fg3.getCachedFeaturegroup().setTimeTravelFormat(TimeTravelFormat.HUDI);
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg0", availableFirst, availableFirst);
    Query secondQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableSecond, availableSecond);
    Query thirdQuery = new Query("fs1", "project_fs1", fg3, "fg2", availableThird, availableThird);
    Join join = new Join(leftQuery, secondQuery, availableFirst, availableSecond, JoinType.INNER, null, singleEqualsJoinOperator);
    Join secondJoin = new Join(leftQuery, thirdQuery, availableFirst, availableFirst, JoinType.INNER, null, singleEqualsJoinOperator);
    leftQuery.setJoins(Arrays.asList(join, secondJoin));
    String query = target.generateSQL(leftQuery, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql().replace("\n", " ");
    Assert.assertEquals("SELECT `fg0`.`ft1`, `fg1`.`ft2`, `fg2`.`ft1` " + "FROM `fg0` `fg0` " + "INNER JOIN `fg1` `fg1` ON `fg0`.`ft1` = `fg1`.`ft2` " + "INNER JOIN `fg2` `fg2` ON `fg0`.`ft1` = `fg2`.`ft1`", query);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) ArrayList(java.util.ArrayList) Collection(java.util.Collection) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Test(org.junit.Test)

Example 27 with SparkSqlDialect

use of org.apache.calcite.sql.dialect.SparkSqlDialect in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlWithDefault.

@Test
public void testGenerateSqlWithDefault() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft));
    leftFeatures.add(new Feature("label", "fg0", fgLeft));
    List<Feature> rightFeatures = new ArrayList<>();
    rightFeatures.add(new Feature("pk1", "fg1", fgRight));
    rightFeatures.add(new Feature("pk2", "fg1", fgRight));
    rightFeatures.add(new Feature("ts", "fg1", fgRight));
    rightFeatures.add(new Feature("ft1", "fg1", fgRight, "string", "abc"));
    List<Feature> rightFeatures1 = new ArrayList<>();
    rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ts", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
    Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
    Query right1 = new Query("fs", "project", fgRight, "fg2", rightFeatures1, rightFeatures1, false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, null, joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, " + "CASE WHEN `fg1`.`ft1` IS NULL THEN 'abc' ELSE `fg1`.`ft1` END `ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1`, `fg2`.`ts`, `fg2`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`ft1`, `right_fg1`.`pk1`, `right_fg1`.`ts`, `right_fg1`.`ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 28 with SparkSqlDialect

use of org.apache.calcite.sql.dialect.SparkSqlDialect in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlNoPKSelected.

@Test
public void testGenerateSqlNoPKSelected() {
    List<Feature> leftFeaturesSelected = new ArrayList<>();
    leftFeaturesSelected.add(new Feature("label", "fg0", fgLeft));
    List<Feature> leftFeaturesAvailable = new ArrayList<>();
    leftFeaturesAvailable.add(new Feature("pk1", "fg0", fgLeft, true));
    leftFeaturesAvailable.add(new Feature("pk2", "fg0", fgLeft));
    leftFeaturesAvailable.add(new Feature("ts", "fg0", fgLeft));
    leftFeaturesAvailable.add(new Feature("label", "fg0", fgLeft));
    List<Feature> rightFeatures = new ArrayList<>();
    rightFeatures.add(new Feature("pk1", "fg1", fgRight));
    rightFeatures.add(new Feature("pk2", "fg1", fgRight));
    rightFeatures.add(new Feature("ts", "fg1", fgRight));
    rightFeatures.add(new Feature("ft1", "fg1", fgRight));
    List<Feature> rightFeatures1 = new ArrayList<>();
    rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ts", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeaturesSelected, leftFeaturesAvailable, false, null);
    Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
    Query right1 = new Query("fs", "project", fgRight1, "fg2", rightFeatures1, rightFeatures1, false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, null, joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1`, `fg2`.`ts`, `fg2`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg2_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`label`, `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`ft1`, `right_fg1`.`pk1`, `right_fg1`.`ts`, `right_fg1`.`ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 29 with SparkSqlDialect

use of org.apache.calcite.sql.dialect.SparkSqlDialect in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlTrainingDataset.

@Test
public void testGenerateSqlTrainingDataset() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true, 1));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft, false, 2));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft, false, 3));
    leftFeatures.add(new Feature("label", "fg0", fgLeft, false, 4));
    // add all features to left query as we do for training datasets
    leftFeatures.add(new Feature("ft1", "fg1", fgRight, false, 5));
    leftFeatures.add(new Feature("ft2", "fg2", fgRight1, false, 6));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
    Query right = new Query("fs", "project", fgRight, "fg1", new ArrayList<>(), new ArrayList<>(), false, null);
    Query right1 = new Query("fs", "project", fgRight1, "fg2", new ArrayList<>(), new ArrayList<>(), false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, null, joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, true).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg1`.`ft1`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg2`.`ft2`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg2_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg0`.`ft1`, `right_fg1`.`ft2`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 30 with SparkSqlDialect

use of org.apache.calcite.sql.dialect.SparkSqlDialect in project hopsworks by logicalclocks.

the class TestPitJoinController method testRankOverAs.

@Test
public void testRankOverAs() {
    List<Feature> partitionFeatures = new ArrayList<>();
    partitionFeatures.add(new Feature("pk1", "fg1"));
    partitionFeatures.add(new Feature("pk2", "fg1"));
    partitionFeatures.add(new Feature("ts", "fg1"));
    Feature orderByFeature = new Feature("ts", "fg2");
    SqlNode rank = pitJoinController.rankOverAs(partitionFeatures, orderByFeature);
    String expected = "RANK() OVER (PARTITION BY `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks";
    Assert.assertEquals(expected, rank.toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql());
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlNode(org.apache.calcite.sql.SqlNode) Test(org.junit.Test)

Aggregations

SparkSqlDialect (org.apache.calcite.sql.dialect.SparkSqlDialect)43 Test (org.junit.Test)41 ArrayList (java.util.ArrayList)27 Join (io.hops.hopsworks.common.featurestore.query.join.Join)21 Query (io.hops.hopsworks.common.featurestore.query.Query)14 Feature (io.hops.hopsworks.common.featurestore.query.Feature)13 SqlCondition (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition)10 SqlFilterLogic (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic)10 SqlNode (org.apache.calcite.sql.SqlNode)7 HiveSqlDialect (org.apache.calcite.sql.dialect.HiveSqlDialect)5 Collection (java.util.Collection)3 Filter (io.hops.hopsworks.common.featurestore.query.filter.Filter)2 FilterLogic (io.hops.hopsworks.common.featurestore.query.filter.FilterLogic)2 SQLException (java.sql.SQLException)1 SqlCall (org.apache.calcite.sql.SqlCall)1 SqlSelect (org.apache.calcite.sql.SqlSelect)1 AccessSqlDialect (org.apache.calcite.sql.dialect.AccessSqlDialect)1 AnsiSqlDialect (org.apache.calcite.sql.dialect.AnsiSqlDialect)1 BigQuerySqlDialect (org.apache.calcite.sql.dialect.BigQuerySqlDialect)1 CalciteSqlDialect (org.apache.calcite.sql.dialect.CalciteSqlDialect)1