use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectFeatures.
// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
List<TrainingDatasetFeature> features = new ArrayList<>();
boolean isLabel = false;
TransformationFunction transformationFunction = null;
for (Feature f : query.getFeatures()) {
if (featureDTOs != null && !featureDTOs.isEmpty()) {
// identify if feature is label
isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
// get transformation function for this feature
transformationFunction = getTransformationFunction(f, featureDTOs);
}
features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
}
if (query.getJoins() != null) {
for (Join join : query.getJoins()) {
joinIndex++;
List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
features.addAll(joinFeatures);
featureIndex += joinFeatures.size();
}
}
return features;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class TrainingDatasetController method getQueryJoin.
// Rebuild query object so that the query constructor can be build the string
public Join getQueryJoin(Query leftQuery, TrainingDatasetJoin rightTdJoin, Map<Integer, String> fgAliasLookup, Map<Integer, String> fsLookup, Map<Integer, List<Feature>> availableFeaturesLookup, Boolean isHiveEngine) throws FeaturestoreException {
String rightAs = fgAliasLookup.get(rightTdJoin.getId());
Query rightQuery = new Query(fsLookup.get(rightTdJoin.getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(rightTdJoin.getFeatureGroup().getFeaturestore().getProject()), rightTdJoin.getFeatureGroup(), rightAs, // no requested features as they are all in the left base query
new ArrayList<>(), availableFeaturesLookup.get(rightTdJoin.getFeatureGroup().getId()), isHiveEngine);
List<Feature> leftOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getLeftFeature())).collect(Collectors.toList());
List<Feature> rightOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getRightFeature())).collect(Collectors.toList());
JoinType joinType = JoinType.values()[rightTdJoin.getType()];
return queryController.extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinType, rightTdJoin.getPrefix());
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method getServingStatements.
private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
// Check that all the feature groups still exists, if not throw a reasonable error
if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// each join is a feature group, iterate over them.
for (TrainingDatasetJoin join : joins) {
Featuregroup featuregroup = join.getFeatureGroup();
if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
}
Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
// Identify and create primary key features for this feature group. Primary key features may not be the part of
// query that generated the training dataset.
List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
if (primaryKeys.size() == 0) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
}
// create td features
List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
// part of the prepared statement thus don't add to this query.
if (selectFeatures.size() > 0) {
// construct query for this feature group
Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
// construct ServingPreparedStatementDTO and add to the list
servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
}
}
return servingPreparedStatementDTOS;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class FeatureViewController method setQuery.
private void setQuery(Project project, Users user, QueryDTO queryDTO, FeatureView featureView, List<TrainingDatasetFeatureDTO> featureDTOs) throws FeaturestoreException {
if (queryDTO != null) {
Query query = queryController.convertQueryDTO(project, user, queryDTO, pitJoinController.isPitEnabled(queryDTO));
List<TrainingDatasetJoin> tdJoins = trainingDatasetController.collectJoins(query, null, featureView);
featureView.setJoins(tdJoins);
List<TrainingDatasetFeature> features = trainingDatasetController.collectFeatures(query, featureDTOs, null, featureView, 0, tdJoins, 0);
featureView.setFeatures(features);
}
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin in project hopsworks by logicalclocks.
the class TrainingDatasetController method getAliasLookupTable.
public Map<Integer, String> getAliasLookupTable(List<TrainingDatasetJoin> tdJoins) {
// Keep a map of fg Id to fgAlias;
int i = 0;
Map<Integer, String> fgAlias = new HashMap<>();
for (TrainingDatasetJoin tdJoin : tdJoins) {
fgAlias.put(tdJoin.getId(), "fg" + i++);
}
return fgAlias;
}
Aggregations