use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class QueryController method convertJoins.
/**
* Convert the JoinDTOs into the internal representation of the Join object.
* The returned list will already contain the correct set of joining keys
* @param leftQuery
* @param joinDTOS
* @return
*/
private List<Join> convertJoins(Query leftQuery, List<JoinDTO> joinDTOS, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
List<Join> joins = new ArrayList<>();
for (JoinDTO joinDTO : joinDTOS) {
if (joinDTO.getQuery() == null) {
throw new IllegalArgumentException("Subquery not specified");
}
// Recursively convert the QueryDTO. Currently we don't support Joins of Joins
Query rightQuery = convertQueryDTO(joinDTO.getQuery(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled);
if (joinDTO.getOn() != null && !joinDTO.getOn().isEmpty()) {
List<Feature> leftOn = joinDTO.getOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
List<Feature> rightOn = joinDTO.getOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
joins.add(extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinDTO.getType(), joinDTO.getPrefix()));
} else if (joinDTO.getLeftOn() != null && !joinDTO.getLeftOn().isEmpty()) {
List<Feature> leftOn = joinDTO.getLeftOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
List<Feature> rightOn = joinDTO.getRightOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
joins.add(extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinDTO.getType(), joinDTO.getPrefix()));
} else {
// Only if right feature group is present, extract the primary keys for the join
joins.add(extractPrimaryKeysJoin(leftQuery, rightQuery, joinDTO.getType(), joinDTO.getPrefix()));
}
}
return joins;
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestPitJoinController method testWrapSubQueries.
@Test
public void testWrapSubQueries() {
List<Feature> leftFeatures = new ArrayList<>();
leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
leftFeatures.add(new Feature("ts", "fg0", fgLeft));
leftFeatures.add(new Feature("label", "fg0", fgLeft));
List<Feature> rightFeatures = new ArrayList<>();
rightFeatures.add(new Feature("pk1", "fg1", fgRight));
rightFeatures.add(new Feature("pk2", "fg1", fgRight));
rightFeatures.add(new Feature("ts", "fg1", fgRight));
rightFeatures.add(new Feature("ft1", "fg1", fgRight));
List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
query.setJoins(Collections.singletonList(join));
Query baseQuery = new Query(query.getFeatureStore(), query.getProject(), query.getFeaturegroup(), query.getAs(), query.getFeatures(), query.getAvailableFeatures(), query.getHiveEngine(), query.getFilter());
List<SqlSelect> result = pitJoinController.wrapSubQueries(pitJoinController.generateSubQueries(baseQuery, query, false));
String expected = "SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1";
Assert.assertEquals(1, result.size());
Assert.assertEquals(expected, result.get(0).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql());
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestQueryController method testRemoveDuplicateColumnsPrefix.
@Test
public void testRemoveDuplicateColumnsPrefix() throws Exception {
List<Feature> joinLeft = new ArrayList<>();
joinLeft.add(new Feature("ft1", "fg0", "Float", null, null));
List<Feature> availableLeft = new ArrayList<>(joinLeft);
availableLeft.add(new Feature("ft2", "fg0", "int", null, null));
List<Feature> joinRight = new ArrayList<>();
joinRight.add(new Feature("ft1", "fg1", "Float", null, "right_"));
List<Feature> availableRight = new ArrayList<>(joinRight);
availableRight.add(new Feature("ft2", "fg1", "int", null, "right_"));
availableRight.add(new Feature("ft3", "fg1", "int", null, "right_"));
Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg0", availableRight, availableRight);
Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
Join join = new Join(leftQuery, rightQuery, joinLeft, joinRight, JoinType.INNER, "right_", singleEqualsJoinOperator);
leftQuery.setJoins(Arrays.asList(join));
target.removeDuplicateColumns(leftQuery, false);
Assert.assertEquals(3, rightQuery.getFeatures().size());
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestQueryController method testRemoveDuplicateColumnsPitEnabled.
@Test
public void testRemoveDuplicateColumnsPitEnabled() throws Exception {
List<Feature> joinLeft = new ArrayList<>();
joinLeft.add(new Feature("ft1", "fg0", "Float", null, null));
List<Feature> availableLeft = new ArrayList<>(joinLeft);
availableLeft.add(new Feature("ft2", "fg0", "int", null, null));
List<Feature> joinRight = new ArrayList<>();
joinRight.add(new Feature("ft1", "fg1", "Float", null, "right_"));
List<Feature> availableRight = new ArrayList<>(joinRight);
availableRight.add(new Feature("ft2", "fg1", "int", null, "right_"));
availableRight.add(new Feature("ft3", "fg1", "int", null, "right_"));
fg2.setEventTime("ft3");
Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg0", availableRight, availableRight);
Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
Join join = new Join(leftQuery, rightQuery, joinLeft, joinRight, JoinType.INNER, "right_", singleEqualsJoinOperator);
leftQuery.setJoins(Arrays.asList(join));
target.removeDuplicateColumns(leftQuery, true);
Assert.assertEquals(2, rightQuery.getFeatures().size());
Assert.assertEquals("ft1", rightQuery.getFeatures().get(0).getName());
Assert.assertEquals("ft2", rightQuery.getFeatures().get(1).getName());
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestPitJoinController method testGenerateSql.
@Test
public void testGenerateSql() {
List<Feature> leftFeatures = new ArrayList<>();
leftFeatures.add(new Feature("pk1", "fg0", fgLeft, true));
leftFeatures.add(new Feature("pk2", "fg0", fgLeft));
leftFeatures.add(new Feature("ts", "fg0", fgLeft));
leftFeatures.add(new Feature("label", "fg0", fgLeft));
List<Feature> rightFeatures = new ArrayList<>();
rightFeatures.add(new Feature("pk1", "fg1", fgRight));
rightFeatures.add(new Feature("pk2", "fg1", fgRight));
rightFeatures.add(new Feature("ts", "fg1", fgRight));
rightFeatures.add(new Feature("ft1", "fg1", fgRight));
List<Feature> rightFeatures1 = new ArrayList<>();
rightFeatures1.add(new Feature("pk1", "fg2", fgRight1));
rightFeatures1.add(new Feature("ts", "fg2", fgRight1));
rightFeatures1.add(new Feature("ft1", "fg2", fgRight1));
List<Feature> leftOn = Arrays.asList(new Feature("pk1", "fg0", fgLeft), new Feature("pk2", "fg0", fgLeft));
List<Feature> rightOn = Arrays.asList(new Feature("pk1", "fg1", fgRight), new Feature("pk2", "fg1", fgRight));
// join on different pks
List<Feature> leftOn1 = Collections.singletonList(new Feature("pk1", "fg0", fgLeft));
List<Feature> rightOn1 = Collections.singletonList(new Feature("pk1", "fg2", fgRight1));
List<SqlCondition> joinOperator = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
List<SqlCondition> joinOperator1 = Collections.singletonList(SqlCondition.EQUALS);
Query query = new Query("fs", "project", fgLeft, "fg0", leftFeatures, leftFeatures, false, null);
Query right = new Query("fs", "project", fgRight, "fg1", rightFeatures, rightFeatures, false, null);
Query right1 = new Query("fs", "project", fgRight1, "fg2", rightFeatures1, rightFeatures1, false, null);
Join join = new Join(query, right, leftOn, rightOn, JoinType.INNER, null, joinOperator);
Join join1 = new Join(query, right1, leftOn1, rightOn1, JoinType.INNER, null, joinOperator1);
query.setJoins(Arrays.asList(join, join1));
String result = pitJoinController.generateSQL(query, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
String expected = "WITH right_fg0 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg1`.`pk1`, `fg1`.`pk2`, `fg1`.`ts`, `fg1`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts` ORDER BY `fg1`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg1_1` `fg1` ON `fg0`.`pk1` = `fg1`.`pk1` AND `fg0`.`pk2` = `fg1`.`pk2` AND `fg0`.`ts` >= `fg1`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1), " + "right_fg1 AS (SELECT *\n" + "FROM (SELECT `fg0`.`pk1`, `fg0`.`pk2`, `fg0`.`ts`, `fg0`.`label`, `fg0`.`pk1` `join_pk_pk1`, `fg0`.`ts` `join_evt_ts`, `fg2`.`pk1`, `fg2`.`ts`, `fg2`.`ft1`, " + "RANK() OVER (PARTITION BY `fg0`.`pk1`, `fg0`.`ts` ORDER BY `fg2`.`ts` DESC) pit_rank_hopsworks\n" + "FROM `fs`.`fg0_1` `fg0`\n" + "INNER JOIN `fs`.`fg2_1` `fg2` ON `fg0`.`pk1` = `fg2`.`pk1` AND `fg0`.`ts` >= `fg2`.`ts`) NA\n" + "WHERE `pit_rank_hopsworks` = 1) (" + "SELECT `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`label`, `right_fg0`.`pk1`, `right_fg0`.`pk2`, `right_fg0`.`ts`, `right_fg0`.`ft1`, `right_fg1`.`pk1`, `right_fg1`.`ts`, `right_fg1`.`ft1`\n" + "FROM right_fg0\n" + "INNER JOIN right_fg1 ON `right_fg0`.`join_pk_pk1` = `right_fg1`.`join_pk_pk1` AND `right_fg0`.`join_evt_ts` = `right_fg1`.`join_evt_ts`)";
Assert.assertEquals(expected, result);
}
Aggregations