Search in sources :

Example 6 with SqlCondition

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition in project hopsworks by logicalclocks.

the class TestPitJoinController method testGenerateSqlWithFilter.

@Test
public void testGenerateSqlWithFilter() {
    List<Feature> leftFeatures = new ArrayList<>();
    leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
    leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
    leftFeatures.add(new Feature("ts", "fg0", fgLeft));
    leftFeatures.add(new Feature("label", "fg0", fgLeft));
    List<Feature> rightFeatures = new ArrayList<>();
    rightFeatures.add(new Feature("pk1", "fg1", fgRight));
    rightFeatures.add(new Feature("pk2", "fg1", fgRight));
    rightFeatures.add(new Feature("ts", "fg1", fgRight));
    rightFeatures.add(new Feature("ft1", "fg1", fgRight));
    List<Feature> rightFeatures1 = new ArrayList<>();
    rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ts", "fg2", fgRight1));
    rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
    List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
    List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
    // join on different pks
    List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
    List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
    List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
    FilterLogic filter = new FilterLogic(new Filter(Arrays.asList(new Feature("label", "fg0", "int", null, null)), SqlCondition.EQUALS, "1"));
    Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, filter);
    Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
    Query right1 = new Query("fs", "project", fgRight, "fg2", rightFeatures1, rightFeatures1, false, null);
    Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
    Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, "R_", joinOperator1);
    query.setJoins(Arrays.asList(join, join1));
    String result = pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
    String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`\n" + "WHERE `fg0`.`label` = 1) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1` `R_pk1`, `fg2`.`ts` `R_ts`, `fg2`.`ft1` `R_ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`\n" + "WHERE `fg0`.`label` = 1) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`ft1`, `right_fg1`.`R_pk1`, `right_fg1`.`R_ts`, `right_fg1`.`R_ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
    Assert.assertEquals(expected, result);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 7 with SqlCondition

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition in project hopsworks by logicalclocks.

the class TestConstructorController method testNoJoiningKeySingle.

@Test
public void testNoJoiningKeySingle() throws Exception {
    ConstructorController constructorController = new ConstructorController();
    List<Feature> availableLeft = new ArrayList<>();
    availableLeft.add(new Feature("ft1", true));
    List<Feature> availableRight = new ArrayList<>();
    availableRight.add(new Feature("ft1", true));
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
    Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableRight, availableRight);
    List<SqlCondition> expectedConditionList = Arrays.asList(SqlCondition.EQUALS);
    Join join = queryController.extractPrimaryKeysJoin(leftQuery, rightQuery, JoinType.INNER, null);
    Assert.assertEquals(1, join.getLeftOn().size());
    Assert.assertEquals(1, join.getRightOn().size());
    Assert.assertEquals(expectedConditionList, join.getJoinOperator());
}
Also used : ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 8 with SqlCondition

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition in project hopsworks by logicalclocks.

the class TestConstructorController method testNoJoiningKeyMultipleDifferentSizes.

@Test
public void testNoJoiningKeyMultipleDifferentSizes() throws Exception {
    ConstructorController constructorController = new ConstructorController();
    List<Feature> availableLeft = new ArrayList<>();
    availableLeft.add(new Feature("ft1", true));
    availableLeft.add(new Feature("ft2", true));
    availableLeft.add(new Feature("ft4", true));
    List<Feature> availableRight = new ArrayList<>();
    availableRight.add(new Feature("ft1", true));
    availableRight.add(new Feature("ft2", true));
    availableRight.add(new Feature("ft3", true));
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
    Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableRight, availableRight);
    List<SqlCondition> expectedConditionList = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    Join join = queryController.extractPrimaryKeysJoin(leftQuery, rightQuery, JoinType.INNER, null);
    Assert.assertEquals(2, join.getLeftOn().size());
    Assert.assertEquals(2, join.getRightOn().size());
    Assert.assertEquals(expectedConditionList, join.getJoinOperator());
}
Also used : ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 9 with SqlCondition

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition in project hopsworks by logicalclocks.

the class PitJoinController method generateSubQueries.

public List<SqlCall> generateSubQueries(Query baseQuery, Query query, boolean isTrainingDataset) {
    List<SqlCall> subQueries = new ArrayList<>();
    // we always re-select all primary key columns of the "label group" in order to be able to perform final join
    List<Feature> additionalPkFeatures = query.getAvailableFeatures().stream().filter(Feature::isPrimary).map(f -> new Feature(f.getName(), f.getFgAlias(), f.getType(), f.isPrimary(), f.getDefaultValue(), PK_JOIN_PREFIX)).collect(Collectors.toList());
    additionalPkFeatures.add(new Feature(query.getFeaturegroup().getEventTime(), query.getAs(), (String) null, null, EVT_JOIN_PREFIX));
    additionalPkFeatures.forEach(f -> f.setFeatureGroup(query.getFeaturegroup()));
    // group
    for (Join join : query.getJoins()) {
        // add event time inequality join condition
        List<Feature> newLeftOn = addEventTimeOn(join.getLeftOn(), baseQuery.getFeaturegroup(), baseQuery.getAs());
        List<Feature> newRightOn = addEventTimeOn(join.getRightOn(), join.getRightQuery().getFeaturegroup(), join.getRightQuery().getAs());
        List<SqlCondition> newJoinOperator = addEventTimeCondition(join.getJoinOperator(), SqlCondition.GREATER_THAN_OR_EQUAL);
        // single right feature group
        List<Join> newJoins = Collections.singletonList(new Join(baseQuery, join.getRightQuery(), newLeftOn, newRightOn, join.getJoinType(), join.getPrefix(), newJoinOperator));
        baseQuery.setJoins(newJoins);
        // if it's a regular query, this is a no op
        if (isTrainingDataset) {
            baseQuery.setFeatures(dropIrrelevantSubqueryFeatures(query, join.getRightQuery()));
        }
        baseQuery.getFeatures().addAll(additionalPkFeatures);
        // first generate subquery and subsequently add rank over window
        SqlSelect subQuery = constructorController.generateSQL(baseQuery, false);
        // now add rank over window
        subQuery.getSelectList().add(rankOverAs(newLeftOn, new Feature(join.getRightQuery().getFeaturegroup().getEventTime(), join.getRightQuery().getAs(), false)));
        subQueries.add(SqlStdOperatorTable.AS.createCall(SqlParserPos.ZERO, subQuery, new SqlIdentifier(HIVE_ALIAS_PLACEHOLDER, SqlParserPos.ZERO)));
        baseQuery.setFeatures(new ArrayList<>(query.getFeatures()));
    }
    return subQueries;
}
Also used : Arrays(java.util.Arrays) JoinConditionType(org.apache.calcite.sql.JoinConditionType) Feature(io.hops.hopsworks.common.featurestore.query.Feature) JoinController(io.hops.hopsworks.common.featurestore.query.join.JoinController) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlCall(org.apache.calcite.sql.SqlCall) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlNode(org.apache.calcite.sql.SqlNode) SqlWith(org.apache.calcite.sql.SqlWith) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) SqlWindow(org.apache.calcite.sql.SqlWindow) SqlSelect(org.apache.calcite.sql.SqlSelect) EJB(javax.ejb.EJB) JoinType(org.apache.calcite.sql.JoinType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) Join(io.hops.hopsworks.common.featurestore.query.join.Join) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Collectors(java.util.stream.Collectors) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) SqlJoin(org.apache.calcite.sql.SqlJoin) Comparator(java.util.Comparator) SqlNodeList(org.apache.calcite.sql.SqlNodeList) Collections(java.util.Collections) SqlCall(org.apache.calcite.sql.SqlCall) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlJoin(org.apache.calcite.sql.SqlJoin) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) SqlSelect(org.apache.calcite.sql.SqlSelect)

Example 10 with SqlCondition

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition in project hopsworks by logicalclocks.

the class TestConstructorController method testExtractJoinLeftRight.

@Test
public void testExtractJoinLeftRight() throws Exception {
    ConstructorController constructorController = new ConstructorController();
    List<Feature> availableLeft = new ArrayList<>();
    availableLeft.add(new Feature("fg1_ft3"));
    List<Feature> availableRight = new ArrayList<>();
    availableRight.add(new Feature("fg2_ft3"));
    List<Feature> leftOn = Arrays.asList(new Feature("fg1_ft3"));
    List<Feature> rightOn = Arrays.asList(new Feature("fg2_ft3"));
    List<SqlCondition> expectedConditionList = Arrays.asList(SqlCondition.EQUALS);
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
    Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableRight, availableRight);
    Join join = queryController.extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, JoinType.INNER, null);
    Assert.assertEquals(1, join.getLeftOn().size());
    Assert.assertEquals(1, join.getRightOn().size());
    Assert.assertEquals(expectedConditionList, join.getJoinOperator());
}
Also used : ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Aggregations

Join (io.hops.hopsworks.common.featurestore.query.join.Join)15 SqlCondition (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition)15 ArrayList (java.util.ArrayList)15 Test (org.junit.Test)13 Feature (io.hops.hopsworks.common.featurestore.query.Feature)12 Query (io.hops.hopsworks.common.featurestore.query.Query)12 SparkSqlDialect (org.apache.calcite.sql.dialect.SparkSqlDialect)10 Filter (io.hops.hopsworks.common.featurestore.query.filter.Filter)3 SqlCall (org.apache.calcite.sql.SqlCall)3 SqlSelect (org.apache.calcite.sql.SqlSelect)3 Strings (com.google.common.base.Strings)2 ConstructorController (io.hops.hopsworks.common.featurestore.query.ConstructorController)2 QueryDTO (io.hops.hopsworks.common.featurestore.query.QueryDTO)2 FilterController (io.hops.hopsworks.common.featurestore.query.filter.FilterController)2 JoinController (io.hops.hopsworks.common.featurestore.query.join.JoinController)2 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)2 Arrays (java.util.Arrays)2 Collections (java.util.Collections)2 Comparator (java.util.Comparator)2 List (java.util.List)2