use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature in project hopsworks by logicalclocks.
the class TrainingDatasetController method convertTrainingDatasetToDTO.
/**
* Converts a trainingDataset entity to a TrainingDataset DTO
*
* @param user
* @param project
* @param trainingDataset trainingDataset entity
* @return JSON/XML DTO of the trainingDataset
* @throws ServiceException
* @throws FeaturestoreException
*/
private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) throws ServiceException, FeaturestoreException {
TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(trainingDataset);
String featurestoreName = featurestoreFacade.getHiveDbName(trainingDataset.getFeaturestore().getHiveDbId());
trainingDatasetDTO.setFeaturestoreName(featurestoreName);
// Set features
List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, true);
Map<Integer, String> fsLookupTable = getFsLookupTableFeatures(tdFeatures);
trainingDatasetDTO.setFeatures(tdFeatures.stream().map(f -> new TrainingDatasetFeatureDTO(checkPrefix(f), f.getType(), f.getFeatureGroup() != null ? new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), f.getFeatureGroup().getVersion(), onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), f.getFeatureGroup().getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) : null, f.getIndex(), f.isLabel())).collect(Collectors.toList()));
switch(trainingDataset.getTrainingDatasetType()) {
case HOPSFS_TRAINING_DATASET:
return hopsfsTrainingDatasetController.convertHopsfsTrainingDatasetToDTO(trainingDatasetDTO, trainingDataset);
case EXTERNAL_TRAINING_DATASET:
return externalTrainingDatasetController.convertExternalTrainingDatasetToDTO(user, project, trainingDatasetDTO, trainingDataset);
default:
throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE.getMessage() + ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDataset.getTrainingDatasetType());
}
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectFeatures.
// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
List<TrainingDatasetFeature> features = new ArrayList<>();
boolean isLabel = false;
TransformationFunction transformationFunction = null;
for (Feature f : query.getFeatures()) {
if (featureDTOs != null && !featureDTOs.isEmpty()) {
// identify if feature is label
isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
// get transformation function for this feature
transformationFunction = getTransformationFunction(f, featureDTOs);
}
features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
}
if (query.getJoins() != null) {
for (Join join : query.getJoins()) {
joinIndex++;
List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
features.addAll(joinFeatures);
featureIndex += joinFeatures.size();
}
}
return features;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method getServingStatements.
private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
// Check that all the feature groups still exists, if not throw a reasonable error
if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// each join is a feature group, iterate over them.
for (TrainingDatasetJoin join : joins) {
Featuregroup featuregroup = join.getFeatureGroup();
if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
}
Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
// Identify and create primary key features for this feature group. Primary key features may not be the part of
// query that generated the training dataset.
List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
if (primaryKeys.size() == 0) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
}
// create td features
List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
// part of the prepared statement thus don't add to this query.
if (selectFeatures.size() > 0) {
// construct query for this feature group
Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
// construct ServingPreparedStatementDTO and add to the list
servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
}
}
return servingPreparedStatementDTOS;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature in project hopsworks by logicalclocks.
the class TransformationFunctionBuilder method build.
public TransformationFunctionAttachedDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Users user, Project project, TrainingDataset trainingDataset) throws FeaturestoreException {
TransformationFunctionAttachedDTO transformationFunctionAttachedDTO = new TransformationFunctionAttachedDTO();
transformationFunctionAttachedDTO.setHref(uri(uriInfo, project, trainingDataset.getFeaturestore(), trainingDataset));
transformationFunctionAttachedDTO.setExpand(expand(resourceRequest));
if (transformationFunctionAttachedDTO.isExpand()) {
List<TransformationFunctionAttachedDTO> list = new ArrayList<>();
for (TrainingDatasetFeature tdFeature : trainingDataset.getFeatures()) {
if (tdFeature.getTransformationFunction() != null) {
TransformationFunctionAttachedDTO build = build(uriInfo, resourceRequest, user, project, trainingDataset, tdFeature);
list.add(build);
}
}
transformationFunctionAttachedDTO.setItems(list);
transformationFunctionAttachedDTO.setCount((long) list.size());
}
return transformationFunctionAttachedDTO;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature in project hopsworks by logicalclocks.
the class FeatureViewController method setQuery.
private void setQuery(Project project, Users user, QueryDTO queryDTO, FeatureView featureView, List<TrainingDatasetFeatureDTO> featureDTOs) throws FeaturestoreException {
if (queryDTO != null) {
Query query = queryController.convertQueryDTO(project, user, queryDTO, pitJoinController.isPitEnabled(queryDTO));
List<TrainingDatasetJoin> tdJoins = trainingDatasetController.collectJoins(query, null, featureView);
featureView.setJoins(tdJoins);
List<TrainingDatasetFeature> features = trainingDatasetController.collectFeatures(query, featureDTOs, null, featureView, 0, tdJoins, 0);
featureView.setFeatures(features);
}
}
Aggregations