use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.
the class TrainingDatasetController method convertTrainingDatasetToDTO.
/**
* Converts a trainingDataset entity to a TrainingDataset DTO
*
* @param user
* @param project
* @param trainingDataset trainingDataset entity
* @return JSON/XML DTO of the trainingDataset
* @throws ServiceException
* @throws FeaturestoreException
*/
private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) throws ServiceException, FeaturestoreException {
TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(trainingDataset);
String featurestoreName = featurestoreFacade.getHiveDbName(trainingDataset.getFeaturestore().getHiveDbId());
trainingDatasetDTO.setFeaturestoreName(featurestoreName);
// Set features
List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, true);
Map<Integer, String> fsLookupTable = getFsLookupTableFeatures(tdFeatures);
trainingDatasetDTO.setFeatures(tdFeatures.stream().map(f -> new TrainingDatasetFeatureDTO(checkPrefix(f), f.getType(), f.getFeatureGroup() != null ? new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), f.getFeatureGroup().getVersion(), onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), f.getFeatureGroup().getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) : null, f.getIndex(), f.isLabel())).collect(Collectors.toList()));
switch(trainingDataset.getTrainingDatasetType()) {
case HOPSFS_TRAINING_DATASET:
return hopsfsTrainingDatasetController.convertHopsfsTrainingDatasetToDTO(trainingDatasetDTO, trainingDataset);
case EXTERNAL_TRAINING_DATASET:
return externalTrainingDatasetController.convertExternalTrainingDatasetToDTO(user, project, trainingDatasetDTO, trainingDataset);
default:
throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE.getMessage() + ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDataset.getTrainingDatasetType());
}
}
use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectFeatures.
// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
List<TrainingDatasetFeature> features = new ArrayList<>();
boolean isLabel = false;
TransformationFunction transformationFunction = null;
for (Feature f : query.getFeatures()) {
if (featureDTOs != null && !featureDTOs.isEmpty()) {
// identify if feature is label
isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
// get transformation function for this feature
transformationFunction = getTransformationFunction(f, featureDTOs);
}
features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
}
if (query.getJoins() != null) {
for (Join join : query.getJoins()) {
joinIndex++;
List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
features.addAll(joinFeatures);
featureIndex += joinFeatures.size();
}
}
return features;
}
use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.
the class TrainingDatasetController method getTransformationFunction.
private TransformationFunction getTransformationFunction(Feature feature, List<TrainingDatasetFeatureDTO> featureDTOs) throws FeaturestoreException {
TrainingDatasetFeatureDTO featureDTO = featureDTOs.stream().filter(dto -> feature.getName().equals(dto.getFeatureGroupFeatureName())).findFirst().orElse(null);
TransformationFunction transformationFunction = null;
if (featureDTO != null && featureDTO.getTransformationFunction() != null) {
transformationFunction = transformationFunctionFacade.findById(featureDTO.getTransformationFunction().getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRANSFORMATION_FUNCTION_DOES_NOT_EXIST, Level.FINE, "Could not find transformation function with ID" + featureDTO.getTransformationFunction().getId()));
}
return transformationFunction;
}
use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.
the class TrainingDatasetInputValidation method validateFeatures.
public void validateFeatures(Query query, List<TrainingDatasetFeatureDTO> featuresDTOs) throws FeaturestoreException {
if (query == null || featuresDTOs == null) {
// needed.
return;
}
List<TrainingDatasetFeatureDTO> labels = featuresDTOs.stream().filter(TrainingDatasetFeatureDTO::getLabel).collect(Collectors.toList());
List<TrainingDatasetFeatureDTO> featuresWithTransformation = featuresDTOs.stream().filter(f -> f.getTransformationFunction() != null).collect(Collectors.toList());
List<Feature> features = collectFeatures(query);
for (TrainingDatasetFeatureDTO label : labels) {
if (features.stream().noneMatch(f -> f.getName().equals(label.getName()))) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.LABEL_NOT_FOUND, Level.FINE, "Label: " + label.getName() + " is missing");
}
}
for (TrainingDatasetFeatureDTO featureWithTransformation : featuresWithTransformation) {
if (features.stream().noneMatch(f -> f.getName().equals(featureWithTransformation.getFeatureGroupFeatureName()))) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_WITH_TRANSFORMATION_NOT_FOUND, Level.FINE, "feature: " + featureWithTransformation.getName() + " is missing and transformation function can't be attached");
}
}
// verify join prefix if any
if (query != null && query.getJoins() != null) {
for (Join join : query.getJoins()) {
if (join.getPrefix() != null) {
Pattern namePattern = FeaturestoreConstants.FEATURESTORE_REGEX;
if (!namePattern.matcher(join.getPrefix()).matches()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_PREFIX_NAME, Level.FINE, ", the provided prefix name " + join.getPrefix() + " is invalid. Prefix names can only contain lower" + " case characters, numbers and underscores and cannot be longer than " + FeaturestoreConstants.FEATURESTORE_ENTITY_NAME_MAX_LENGTH + " characters or empty.");
}
}
}
}
}
use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.
the class HopsFSProvenanceController method fromTrainingDatasetQuery.
private List<FeaturegroupXAttr.SimplifiedDTO> fromTrainingDatasetQuery(TrainingDatasetDTO trainingDatasetDTO) {
Map<Integer, FeaturegroupXAttr.SimplifiedDTO> featuregroups = new HashMap<>();
for (TrainingDatasetFeatureDTO feature : trainingDatasetDTO.getFeatures()) {
FeaturegroupXAttr.SimplifiedDTO featuregroup = featuregroups.get(feature.getFeaturegroup().getId());
if (featuregroup == null) {
featuregroup = new FeaturegroupXAttr.SimplifiedDTO(feature.getFeaturegroup().getFeaturestoreId(), feature.getFeaturegroup().getName(), feature.getFeaturegroup().getVersion());
featuregroups.put(feature.getFeaturegroup().getId(), featuregroup);
}
featuregroup.addFeature(feature.getName());
}
return new ArrayList<>(featuregroups.values());
}
Aggregations