Search in sources :

Example 11 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlTrainingDatasetWrongFeatureOrder.

@Test
public void testGenerateSqlTrainingDatasetWrongFeatureOrder() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true, 1));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft, false, 2));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft, false, 3));
    leftFeatures.add(new Feature("label", "fg0", fgLeft, false, 4));
    // note wrong order
    leftFeatures.add(new Feature("ft1", "fg1", fgRight, false, 6));
    leftFeatures.add(new Feature("ft2", "fg2", fgRight1, false, 5));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
    Query right = new Query("fs", "project", fgRight, "fg1", new ArrayList<>(), new ArrayList<>(), false, null);
    Query right1 = new Query("fs", "project", fgRight1, "fg2", new ArrayList<>(), new ArrayList<>(), false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, null, joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, true).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg1`.`ft1`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg2`.`ft2`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg2_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg1`.`ft2`, `right_fg0`.`ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 12 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSubQueriesSingle.

@Test
public void testGenerateSubQueriesSingle() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft));
    leftFeatures.add(new Feature("label", "fg0", fgLeft));
    List<Feature> rightFeatures = new ArrayList<>();
    rightFeatures.add(new Feature("pk1", "fg1", fgRight));
    rightFeatures.add(new Feature("pk2", "fg1", fgRight));
    rightFeatures.add(new Feature("ts", "fg1", fgRight));
    rightFeatures.add(new Feature("ft1", "fg1", fgRight));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
    Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    query.setJoins(Collections.singletonList(join));
    Query baseQuery = new Query(query.getFeatureStore(), query.getProject(), query.getFeaturegroup(), query.getAs(), query.getFeatures(), query.getAvailableFeatures(), query.getHiveEngine(), query.getFilter());
    List<SqlCall> result = pitJoinController.generateSubQueries(baseQuery, query, false);
    String expected = "(SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA";
    Assert.assertEquals(1, result.size());
    Assert.assertEquals(expected, result.get(0).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql());
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) SqlCall(org.apache.calcite.sql.SqlCall) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 13 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlWithFilter.

@Test
public void testGenerateSqlWithFilter() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft));
    leftFeatures.add(new Feature("label", "fg0", fgLeft));
    List<Feature> rightFeatures = new ArrayList<>();
    rightFeatures.add(new Feature("pk1", "fg1", fgRight));
    rightFeatures.add(new Feature("pk2", "fg1", fgRight));
    rightFeatures.add(new Feature("ts", "fg1", fgRight));
    rightFeatures.add(new Feature("ft1", "fg1", fgRight));
    List<Feature> rightFeatures1 = new ArrayList<>();
    rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ts", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    FilterLogic filter = new FilterLogic(new Filter(Arrays.asList(new Feature("label", "fg0", "int", null, null)), SqlCondition.EQUALS, "1"));
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, filter);
    Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
    Query right1 = new Query("fs", "project", fgRight, "fg2", rightFeatures1, rightFeatures1, false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, "R_", joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`\n" + "WHERE `fg0`.`label` = 1) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1` `R_pk1`, `fg2`.`ts` `R_ts`, `fg2`.`ft1` `R_ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`\n" + "WHERE `fg0`.`label` = 1) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`ft1`, `right_fg1`.`R_pk1`, `right_fg1`.`R_ts`, `right_fg1`.`R_ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 14 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestPitJoinController method testIsPitEnabledTrueQuery.

@Test
public void testIsPitEnabledTrueQuery() {
    Featuregroup fg1 = new Featuregroup(1);
    fg1.setEventTime("ts");
    Featuregroup fg2 = new Featuregroup(2);
    fg2.setEventTime("ts");
    Query right1 = new Query("fs", fg1);
    Query right2 = new Query("fs", fg2);
    Query leftQuery = new Query("fs", fg1);
    Join join1 = new Join(leftQuery, right1, null, null, null, null, null);
    Join join2 = new Join(leftQuery, right2, null, null, null, null, null);
    List<Join> joins = Arrays.asList(join1, join2);
    leftQuery.setJoins(joins);
    Assert.assertEquals(true, pitJoinController.isPitEnabled(leftQuery));
}
Also used : Query(io.hops.hopsworks.common.featurestore.query.Query) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Test(org.junit.Test)

Example 15 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestFilterController method testBuildFilterNodeTripleJoin.

@Test
public void testBuildFilterNodeTripleJoin() throws Exception {
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg0", fg1Features, fg1Features);
    Query secondQuery = new Query("fs1", "project_fs1", fg2, "fg1", fg2Features, fg2Features);
    Query thirdQuery = new Query("fs1", "project_fs1", fg3, "fg2", fg3Features, fg3Features);
    FilterLogic firstFilter = new FilterLogic(SqlFilterLogic.AND);
    firstFilter.setLeftFilter(new Filter(Arrays.asList(fg1Features.get(1)), SqlCondition.EQUALS, "10"));
    FilterLogic rightLogic = new FilterLogic(SqlFilterLogic.OR);
    rightLogic.setLeftFilter(new Filter(Arrays.asList(fg3Features.get(1)), SqlCondition.EQUALS, "10"));
    rightLogic.setRightFilter(new Filter(Arrays.asList(fg3Features.get(2)), SqlCondition.EQUALS, "10"));
    firstFilter.setRightLogic(rightLogic);
    leftQuery.setFilter(firstFilter);
    FilterLogic secondFilter = new FilterLogic(SqlFilterLogic.SINGLE);
    secondFilter.setLeftFilter(new Filter(Arrays.asList(fg2Features.get(1)), SqlCondition.NOT_EQUALS, "10"));
    secondQuery.setFilter(secondFilter);
    Join join = new Join(leftQuery, secondQuery, joinFeatures, joinFeatures, JoinType.INNER, null, Arrays.asList(SqlCondition.EQUALS));
    Join secondJoin = new Join(leftQuery, thirdQuery, leftOn, rightOn, JoinType.INNER, null, Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS));
    leftQuery.setJoins(Arrays.asList(join, secondJoin));
    String result = filterController.buildFilterNode(leftQuery, leftQuery, leftQuery.getJoins().size() - 1, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "`fg1`.`fg1_ft` = 10 " + "AND (CASE WHEN `fg3`.`fg3_ft` IS NULL THEN 'default' ELSE `fg3`.`fg3_ft` END = '10' OR `fg3`.`join` = '10') " + "AND CASE WHEN `fg2`.`fg2_ft` IS NULL THEN 10.0 ELSE `fg2`.`fg2_ft` END <> 10";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Test(org.junit.Test)

Aggregations

Join (io.hops.hopsworks.common.featurestore.query.join.Join)45 ArrayList (java.util.ArrayList)39 Test (org.junit.Test)31 Query (io.hops.hopsworks.common.featurestore.query.Query)21 SparkSqlDialect (org.apache.calcite.sql.dialect.SparkSqlDialect)21 SqlCondition (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition)19 Feature (io.hops.hopsworks.common.featurestore.query.Feature)17 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)12 List (java.util.List)11 Collectors (java.util.stream.Collectors)11 EJB (javax.ejb.EJB)11 Stateless (javax.ejb.Stateless)11 TransactionAttribute (javax.ejb.TransactionAttribute)11 TransactionAttributeType (javax.ejb.TransactionAttributeType)11 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)9 RESTCodes (io.hops.hopsworks.restutils.RESTCodes)9 Level (java.util.logging.Level)9 JoinType (org.apache.calcite.sql.JoinType)9 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)8 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)8