use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestConstructorController method testNoJoiningKeyMultipleDifferentSizes.
@Test
public void testNoJoiningKeyMultipleDifferentSizes() throws Exception {
ConstructorController constructorController = new ConstructorController();
List<Feature> availableLeft = new ArrayList<>();
availableLeft.add(new Feature("ft1", true));
availableLeft.add(new Feature("ft2", true));
availableLeft.add(new Feature("ft4", true));
List<Feature> availableRight = new ArrayList<>();
availableRight.add(new Feature("ft1", true));
availableRight.add(new Feature("ft2", true));
availableRight.add(new Feature("ft3", true));
Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableRight, availableRight);
List<SqlCondition> expectedConditionList = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
Join join = queryController.extractPrimaryKeysJoin(leftQuery, rightQuery, JoinType.INNER, null);
Assert.assertEquals(2, join.getLeftOn().size());
Assert.assertEquals(2, join.getRightOn().size());
Assert.assertEquals(expectedConditionList, join.getJoinOperator());
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestConstructorController method testSingleJoinOrderBySQLQueryOnline.
@Test
public void testSingleJoinOrderBySQLQueryOnline() throws Exception {
List<Feature> availableLeft = new ArrayList<>();
availableLeft.add(new Feature("ft1", "fg1", "Float", null, null));
List<Feature> availableRight = new ArrayList<>();
availableRight.add(new Feature("ft1", "fg2", "Float", null, null));
Query leftQuery = new Query("fs1", "project_fs2", fg1, "fg1", availableLeft, availableLeft);
Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg2", availableRight, availableRight);
Join join = new Join(leftQuery, rightQuery, availableLeft, availableLeft, JoinType.INNER, null, singleEqualsJoinOperator);
leftQuery.setJoins(Arrays.asList(join));
List<Feature> orderByList = new ArrayList<>();
orderByList.addAll(availableLeft);
orderByList.addAll(availableRight);
leftQuery.setOrderByFeatures(orderByList);
String query = target.generateSQL(leftQuery, true).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql().replace("\n", " ");
Assert.assertEquals("SELECT `fg1`.`ft1`, `fg2`.`ft1` FROM `project_fs2`.`fg1_1` `fg1` INNER JOIN " + "`project_fs1`.`fg2_1` `fg2` ON `fg1`.`ft1` = `fg2`.`ft1` ORDER BY `fg1`.`ft1`, `fg2`.`ft1`", query);
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class PitJoinController method generateSubQueries.
public List<SqlCall> generateSubQueries(Query baseQuery, Query query, boolean isTrainingDataset) {
List<SqlCall> subQueries = new ArrayList<>();
// we always re-select all primary key columns of the "label group" in order to be able to perform final join
List<Feature> additionalPkFeatures = query.getAvailableFeatures().stream().filter(Feature::isPrimary).map(f -> new Feature(f.getName(), f.getFgAlias(), f.getType(), f.isPrimary(), f.getDefaultValue(), PK_JOIN_PREFIX)).collect(Collectors.toList());
additionalPkFeatures.add(new Feature(query.getFeaturegroup().getEventTime(), query.getAs(), (String) null, null, EVT_JOIN_PREFIX));
additionalPkFeatures.forEach(f -> f.setFeatureGroup(query.getFeaturegroup()));
// group
for (Join join : query.getJoins()) {
// add event time inequality join condition
List<Feature> newLeftOn = addEventTimeOn(join.getLeftOn(), baseQuery.getFeaturegroup(), baseQuery.getAs());
List<Feature> newRightOn = addEventTimeOn(join.getRightOn(), join.getRightQuery().getFeaturegroup(), join.getRightQuery().getAs());
List<SqlCondition> newJoinOperator = addEventTimeCondition(join.getJoinOperator(), SqlCondition.GREATER_THAN_OR_EQUAL);
// single right feature group
List<Join> newJoins = Collections.singletonList(new Join(baseQuery, join.getRightQuery(), newLeftOn, newRightOn, join.getJoinType(), join.getPrefix(), newJoinOperator));
baseQuery.setJoins(newJoins);
// if it's a regular query, this is a no op
if (isTrainingDataset) {
baseQuery.setFeatures(dropIrrelevantSubqueryFeatures(query, join.getRightQuery()));
}
baseQuery.getFeatures().addAll(additionalPkFeatures);
// first generate subquery and subsequently add rank over window
SqlSelect subQuery = constructorController.generateSQL(baseQuery, false);
// now add rank over window
subQuery.getSelectList().add(rankOverAs(newLeftOn, new Feature(join.getRightQuery().getFeaturegroup().getEventTime(), join.getRightQuery().getAs(), false)));
subQueries.add(SqlStdOperatorTable.AS.createCall(SqlParserPos.ZERO, subQuery, new SqlIdentifier(HIVE_ALIAS_PLACEHOLDER, SqlParserPos.ZERO)));
baseQuery.setFeatures(new ArrayList<>(query.getFeatures()));
}
return subQueries;
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class QueryController method convertQueryDTO.
/**
* Recursively convert the QueryDTO into the internal query representation
* @param queryDTO
* @return
*/
public Query convertQueryDTO(QueryDTO queryDTO, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
Integer fgId = queryDTO.getLeftFeatureGroup().getId();
Featuregroup fg = fgLookup.get(fgId);
String featureStore = featurestoreFacade.getHiveDbName(fg.getFeaturestore().getHiveDbId());
// used to build the online query - needs to respect the online db format name
String projectName = onlineFeaturestoreController.getOnlineFeaturestoreDbName(fg.getFeaturestore().getProject());
List<Feature> requestedFeatures = validateFeatures(fg, queryDTO.getLeftFeatures(), availableFeatureLookup.get(fgId));
Query query = new Query(featureStore, projectName, fg, fgAliasLookup.get(fgId), requestedFeatures, availableFeatureLookup.get(fgId), queryDTO.getHiveEngine());
if (fg.getCachedFeaturegroup() != null && fg.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
// if hudi and end hive engine, only possible to get latest snapshot else raise exception
if (queryDTO.getHiveEngine() && (queryDTO.getLeftFeatureGroupEndTime() != null || queryDTO.getJoins().stream().anyMatch(join -> join.getQuery().getLeftFeatureGroupEndTime() != null))) {
throw new IllegalArgumentException("Hive engine on Python environments does not support incremental or " + "snapshot queries. Read feature group without timestamp to retrieve latest snapshot or switch to " + "environment with Spark Engine.");
}
// If the feature group is hudi, validate and configure start and end commit id/timestamp
FeatureGroupCommit endCommit = featureGroupCommitCommitController.findCommitByDate(fg, queryDTO.getLeftFeatureGroupEndTime());
query.setLeftFeatureGroupEndTimestamp(endCommit.getCommittedOn());
query.setLeftFeatureGroupEndCommitId(endCommit.getFeatureGroupCommitPK().getCommitId());
if ((queryDTO.getJoins() == null || queryDTO.getJoins().isEmpty()) && queryDTO.getLeftFeatureGroupStartTime() != null) {
Long exactStartCommitTimestamp = featureGroupCommitCommitController.findCommitByDate(query.getFeaturegroup(), queryDTO.getLeftFeatureGroupStartTime()).getCommittedOn();
query.setLeftFeatureGroupStartTimestamp(exactStartCommitTimestamp);
} else if (queryDTO.getJoins() != null && queryDTO.getLeftFeatureGroupStartTime() != null) {
throw new IllegalArgumentException("For incremental queries start time must be provided and " + "join statements are not allowed");
}
}
// If there are any joins, recursively convert the Join's QueryDTO into the internal Query representation
if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
query.setJoins(convertJoins(query, queryDTO.getJoins(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled));
// remove duplicated join columns
removeDuplicateColumns(query, pitEnabled);
}
// If there are any filters, recursively convert the
if (queryDTO.getFilter() != null) {
query.setFilter(filterController.convertFilterLogic(queryDTO.getFilter(), fgLookup, availableFeatureLookup));
}
return query;
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class QueryController method populateFgLookupTables.
public int populateFgLookupTables(QueryDTO queryDTO, int fgId, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, Project project, Users user, String prefix) throws FeaturestoreException {
// go into depth first
if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
for (JoinDTO join : queryDTO.getJoins()) {
fgId = populateFgLookupTables(join.getQuery(), fgId, fgAliasLookup, fgLookup, availableFeatureLookup, project, user, join.getPrefix());
fgId++;
}
}
Featuregroup fg = validateFeaturegroupDTO(queryDTO.getLeftFeatureGroup());
fgLookup.put(fg.getId(), fg);
fgAliasLookup.put(fg.getId(), generateAs(fgId));
List<Feature> availableFeatures = featuregroupController.getFeatures(fg, project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(fg.getId()), f.getType(), f.getDefaultValue(), f.getPrimary(), fg, prefix)).collect(Collectors.toList());
availableFeatureLookup.put(fg.getId(), availableFeatures);
return fgId;
}
Aggregations