use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestFilterController method testBuildFilterNodeSingleJoin.
@Test
public void testBuildFilterNodeSingleJoin() throws Exception {
Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg0", fg1Features, fg1Features);
Query secondQuery = new Query("fs1", "project_fs1", fg2, "fg1", fg2Features, fg2Features);
FilterLogic firstFilter = new FilterLogic(SqlFilterLogic.AND);
firstFilter.setLeftFilter(new Filter(Arrays.asList(fg1Features.get(1)), SqlCondition.EQUALS, "10"));
FilterLogic rightLogic = new FilterLogic(SqlFilterLogic.OR);
rightLogic.setLeftFilter(new Filter(Arrays.asList(fg3Features.get(1)), SqlCondition.EQUALS, "10"));
rightLogic.setRightFilter(new Filter(Arrays.asList(fg3Features.get(2)), SqlCondition.EQUALS, "10"));
firstFilter.setRightLogic(rightLogic);
leftQuery.setFilter(firstFilter);
FilterLogic secondFilter = new FilterLogic(SqlFilterLogic.SINGLE);
secondFilter.setLeftFilter(new Filter(Arrays.asList(fg2Features.get(1)), SqlCondition.NOT_EQUALS, "10"));
secondQuery.setFilter(secondFilter);
Join join = new Join(leftQuery, secondQuery, joinFeatures, joinFeatures, JoinType.INNER, null, Arrays.asList(SqlCondition.EQUALS));
leftQuery.setJoins(Arrays.asList(join));
String result = filterController.buildFilterNode(leftQuery, leftQuery, leftQuery.getJoins().size() - 1, false).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql();
String expected = "`fg1`.`fg1_ft` = 10 " + "AND (CASE WHEN `fg3`.`fg3_ft` IS NULL THEN 'default' ELSE `fg3`.`fg3_ft` END = '10' OR `fg3`.`join` = '10') " + "AND CASE WHEN `fg2`.`fg2_ft` IS NULL THEN 10.0 ELSE `fg2`.`fg2_ft` END <> 10";
Assert.assertEquals(expected, result);
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TestQueryController method testRemoveDuplicateColumns.
@Test
public void testRemoveDuplicateColumns() throws Exception {
List<Feature> joinLeft = new ArrayList<>();
joinLeft.add(new Feature("ft1", "fg0", "Float", null, null));
List<Feature> availableLeft = new ArrayList<>(joinLeft);
availableLeft.add(new Feature("ft2", "fg0", "int", null, null));
List<Feature> joinRight = new ArrayList<>();
joinRight.add(new Feature("ft1", "fg1", "Float", null, null));
List<Feature> availableRight = new ArrayList<>(joinRight);
availableRight.add(new Feature("ft2", "fg1", "int", null, null));
availableRight.add(new Feature("ft3", "fg1", "int", null, null));
Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg0", availableRight, availableRight);
Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
Join join = new Join(leftQuery, rightQuery, joinLeft, joinRight, JoinType.INNER, null, singleEqualsJoinOperator);
leftQuery.setJoins(Arrays.asList(join));
target.removeDuplicateColumns(leftQuery, false);
Assert.assertEquals(2, rightQuery.getFeatures().size());
Assert.assertEquals("ft2", rightQuery.getFeatures().get(0).getName());
Assert.assertEquals("ft3", rightQuery.getFeatures().get(1).getName());
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TrainingDatasetController method getQuery.
// TODO feature view: remove
/**
* Reconstruct the query used to generate the training datset, fetching the features and the joins
* in the proper order from the database.
* @param trainingDataset
* @return
* @throws FeaturestoreException
*/
public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Project project, Users user, Boolean isHiveEngine) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<TrainingDatasetJoin> joins = getJoinsSorted(trainingDataset);
// Convert all the TrainingDatasetFeatures to QueryFeatures
Map<Integer, String> fgAliasLookup = getAliasLookupTable(joins);
// These features are for the select part and are from different feature groups
// to respect the ordering, all selected features are added to the left most Query instead of splitting them
// over the querys for their respective origin feature group
List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, withLabel);
// Check that all the feature groups still exists, if not throw a reasonable error
if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// Get available features for all involved feature groups once, and save in map fgId -> availableFeatures
Map<Integer, List<Feature>> availableFeaturesLookup = new HashMap<>();
for (TrainingDatasetJoin join : joins) {
if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) {
List<Feature> availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getDefaultValue(), f.getPrimary(), join.getFeatureGroup(), join.getPrefix())).collect(Collectors.toList());
availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures);
}
}
Map<String, Feature> featureLookup = availableFeaturesLookup.values().stream().flatMap(List::stream).collect(Collectors.toMap(f -> makeFeatureLookupKey(f.getFeatureGroup().getId(), f.getName()), f -> f, (f1, f2) -> f1));
List<Feature> features = new ArrayList<>();
for (TrainingDatasetFeature requestedFeature : tdFeatures) {
Feature tdFeature = featureLookup.get(makeFeatureLookupKey(requestedFeature.getFeatureGroup().getId(), requestedFeature.getName()));
if (tdFeature == null) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName());
}
// instantiate new feature since alias in available feature is not correct if fg is joined with itself
Feature featureWithCorrectAlias = new Feature(tdFeature.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), tdFeature.getType(), tdFeature.getDefaultValue(), tdFeature.getPrefix(), requestedFeature.getFeatureGroup(), requestedFeature.getIndex());
features.add(featureWithCorrectAlias);
}
// Keep a map feature store id -> feature store name
Map<Integer, String> fsLookup = getFsLookupTableJoins(joins);
Query query = new Query(fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), isHiveEngine);
// Set the remaining feature groups as join
List<Join> queryJoins = new ArrayList<>();
for (int i = 1; i < joins.size(); i++) {
// left side of the join stays fixed, the counter starts at 1
queryJoins.add(getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, isHiveEngine));
}
query.setJoins(queryJoins);
FilterLogic filterLogic = convertToFilterLogic(trainingDataset.getFilters(), featureLookup, "L");
query.setFilter(filterLogic);
return query;
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectJoins.
public List<TrainingDatasetJoin> collectJoins(Query query, TrainingDataset trainingDataset, FeatureView featureView) {
List<TrainingDatasetJoin> joins = new ArrayList<>();
// add the first feature group
int index = 0;
if (query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
joins.add(makeTrainingDatasetJoin(trainingDataset, featureView, query.getFeaturegroup(), query.getLeftFeatureGroupEndCommitId(), (short) 0, index++, null));
} else {
joins.add(makeTrainingDatasetJoin(trainingDataset, featureView, query.getFeaturegroup(), null, (short) 0, index++, null));
}
if (query.getJoins() != null && !query.getJoins().isEmpty()) {
for (Join join : query.getJoins()) {
TrainingDatasetJoin tdJoin;
if (query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
tdJoin = makeTrainingDatasetJoin(trainingDataset, featureView, join.getRightQuery().getFeaturegroup(), join.getRightQuery().getLeftFeatureGroupEndCommitId(), (short) join.getJoinType().ordinal(), index++, join.getPrefix());
} else {
tdJoin = makeTrainingDatasetJoin(trainingDataset, featureView, join.getRightQuery().getFeaturegroup(), null, (short) join.getJoinType().ordinal(), index++, join.getPrefix());
}
tdJoin.setConditions(collectJoinConditions(join, tdJoin));
joins.add(tdJoin);
}
}
return joins;
}
use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.
the class FeatureViewController method makeQuery.
public Query makeQuery(FeatureView featureView, Project project, Users user) throws FeaturestoreException {
List<TrainingDatasetJoin> joins = featureView.getJoins().stream().sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)).collect(Collectors.toList());
Map<Integer, String> fgAliasLookup = trainingDatasetController.getAliasLookupTable(joins);
List<TrainingDatasetFeature> tdFeatures = featureView.getFeatures().stream().sorted((t1, t2) -> {
if (t1.getIndex() != null) {
// compare based on index
return t1.getIndex().compareTo(t2.getIndex());
} else {
// Old training dataset with no index. compare based on name
return t1.getName().compareTo(t2.getName());
}
}).collect(Collectors.toList());
// Check that all the feature groups still exists, if not throw a reasonable error
if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// Get available features for all involved feature groups once, and save in map fgId -> availableFeatures
Map<Integer, List<Feature>> availableFeaturesLookup = new HashMap<>();
for (TrainingDatasetJoin join : joins) {
if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) {
List<Feature> availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())).collect(Collectors.toList());
availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures);
}
}
List<Feature> features = new ArrayList<>();
for (TrainingDatasetFeature requestedFeature : tdFeatures) {
features.add(availableFeaturesLookup.get(requestedFeature.getFeatureGroup().getId()).stream().filter(af -> af.getName().equals(requestedFeature.getName())).map(af -> new Feature(af.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), af.getType(), af.getDefaultValue(), af.getPrefix(), requestedFeature.getFeatureGroup(), requestedFeature.getIndex())).findFirst().orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName())));
}
// Keep a map feature store id -> feature store name
Map<Integer, String> fsLookup = trainingDatasetController.getFsLookupTableJoins(joins);
Query query = new Query(fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), false);
// Set the remaining feature groups as join
List<Join> queryJoins = new ArrayList<>();
for (int i = 1; i < joins.size(); i++) {
// left side of the join stays fixed, the counter starts at 1
queryJoins.add(trainingDatasetController.getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, false));
}
query.setJoins(queryJoins);
return query;
}
Aggregations