use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class TrainingDatasetController method getQuery.
// TODO feature view: remove
/**
* Reconstruct the query used to generate the training datset, fetching the features and the joins
* in the proper order from the database.
* @param trainingDataset
* @return
* @throws FeaturestoreException
*/
public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Project project, Users user, Boolean isHiveEngine) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<TrainingDatasetJoin> joins = getJoinsSorted(trainingDataset);
// Convert all the TrainingDatasetFeatures to QueryFeatures
Map<Integer, String> fgAliasLookup = getAliasLookupTable(joins);
// These features are for the select part and are from different feature groups
// to respect the ordering, all selected features are added to the left most Query instead of splitting them
// over the querys for their respective origin feature group
List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, withLabel);
// Check that all the feature groups still exists, if not throw a reasonable error
if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// Get available features for all involved feature groups once, and save in map fgId -> availableFeatures
Map<Integer, List<Feature>> availableFeaturesLookup = new HashMap<>();
for (TrainingDatasetJoin join : joins) {
if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) {
List<Feature> availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getDefaultValue(), f.getPrimary(), join.getFeatureGroup(), join.getPrefix())).collect(Collectors.toList());
availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures);
}
}
Map<String, Feature> featureLookup = availableFeaturesLookup.values().stream().flatMap(List::stream).collect(Collectors.toMap(f -> makeFeatureLookupKey(f.getFeatureGroup().getId(), f.getName()), f -> f, (f1, f2) -> f1));
List<Feature> features = new ArrayList<>();
for (TrainingDatasetFeature requestedFeature : tdFeatures) {
Feature tdFeature = featureLookup.get(makeFeatureLookupKey(requestedFeature.getFeatureGroup().getId(), requestedFeature.getName()));
if (tdFeature == null) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName());
}
// instantiate new feature since alias in available feature is not correct if fg is joined with itself
Feature featureWithCorrectAlias = new Feature(tdFeature.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), tdFeature.getType(), tdFeature.getDefaultValue(), tdFeature.getPrefix(), requestedFeature.getFeatureGroup(), requestedFeature.getIndex());
features.add(featureWithCorrectAlias);
}
// Keep a map feature store id -> feature store name
Map<Integer, String> fsLookup = getFsLookupTableJoins(joins);
Query query = new Query(fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), isHiveEngine);
// Set the remaining feature groups as join
List<Join> queryJoins = new ArrayList<>();
for (int i = 1; i < joins.size(); i++) {
// left side of the join stays fixed, the counter starts at 1
queryJoins.add(getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, isHiveEngine));
}
query.setJoins(queryJoins);
FilterLogic filterLogic = convertToFilterLogic(trainingDataset.getFilters(), featureLookup, "L");
query.setFilter(filterLogic);
return query;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectJoins.
public List<TrainingDatasetJoin> collectJoins(Query query, TrainingDataset trainingDataset, FeatureView featureView) {
List<TrainingDatasetJoin> joins = new ArrayList<>();
// add the first feature group
int index = 0;
if (query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
joins.add(makeTrainingDatasetJoin(trainingDataset, featureView, query.getFeaturegroup(), query.getLeftFeatureGroupEndCommitId(), (short) 0, index++, null));
} else {
joins.add(makeTrainingDatasetJoin(trainingDataset, featureView, query.getFeaturegroup(), null, (short) 0, index++, null));
}
if (query.getJoins() != null && !query.getJoins().isEmpty()) {
for (Join join : query.getJoins()) {
TrainingDatasetJoin tdJoin;
if (query.getFeaturegroup().getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && query.getFeaturegroup().getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
tdJoin = makeTrainingDatasetJoin(trainingDataset, featureView, join.getRightQuery().getFeaturegroup(), join.getRightQuery().getLeftFeatureGroupEndCommitId(), (short) join.getJoinType().ordinal(), index++, join.getPrefix());
} else {
tdJoin = makeTrainingDatasetJoin(trainingDataset, featureView, join.getRightQuery().getFeaturegroup(), null, (short) join.getJoinType().ordinal(), index++, join.getPrefix());
}
tdJoin.setConditions(collectJoinConditions(join, tdJoin));
joins.add(tdJoin);
}
}
return joins;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class FeatureViewController method makeQuery.
public Query makeQuery(FeatureView featureView, Project project, Users user) throws FeaturestoreException {
List<TrainingDatasetJoin> joins = featureView.getJoins().stream().sorted(Comparator.comparing(TrainingDatasetJoin::getIndex)).collect(Collectors.toList());
Map<Integer, String> fgAliasLookup = trainingDatasetController.getAliasLookupTable(joins);
List<TrainingDatasetFeature> tdFeatures = featureView.getFeatures().stream().sorted((t1, t2) -> {
if (t1.getIndex() != null) {
// compare based on index
return t1.getIndex().compareTo(t2.getIndex());
} else {
// Old training dataset with no index. compare based on name
return t1.getName().compareTo(t2.getName());
}
}).collect(Collectors.toList());
// Check that all the feature groups still exists, if not throw a reasonable error
if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// Get available features for all involved feature groups once, and save in map fgId -> availableFeatures
Map<Integer, List<Feature>> availableFeaturesLookup = new HashMap<>();
for (TrainingDatasetJoin join : joins) {
if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) {
List<Feature> availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())).collect(Collectors.toList());
availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures);
}
}
List<Feature> features = new ArrayList<>();
for (TrainingDatasetFeature requestedFeature : tdFeatures) {
features.add(availableFeaturesLookup.get(requestedFeature.getFeatureGroup().getId()).stream().filter(af -> af.getName().equals(requestedFeature.getName())).map(af -> new Feature(af.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), af.getType(), af.getDefaultValue(), af.getPrefix(), requestedFeature.getFeatureGroup(), requestedFeature.getIndex())).findFirst().orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName())));
}
// Keep a map feature store id -> feature store name
Map<Integer, String> fsLookup = trainingDatasetController.getFsLookupTableJoins(joins);
Query query = new Query(fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), false);
// Set the remaining feature groups as join
List<Join> queryJoins = new ArrayList<>();
for (int i = 1; i < joins.size(); i++) {
// left side of the join stays fixed, the counter starts at 1
queryJoins.add(trainingDatasetController.getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, false));
}
query.setJoins(queryJoins);
return query;
}
Aggregations