Search in sources :

Example 1 with TrainingDatasetFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.

the class TrainingDatasetController method convertTrainingDatasetToDTO.

/**
 * Converts a trainingDataset entity to a TrainingDataset DTO
 *
 * @param user
 * @param project
 * @param trainingDataset trainingDataset entity
 * @return JSON/XML DTO of the trainingDataset
 * @throws ServiceException
 * @throws FeaturestoreException
 */
private TrainingDatasetDTO convertTrainingDatasetToDTO(Users user, Project project, TrainingDataset trainingDataset) throws ServiceException, FeaturestoreException {
    TrainingDatasetDTO trainingDatasetDTO = new TrainingDatasetDTO(trainingDataset);
    String featurestoreName = featurestoreFacade.getHiveDbName(trainingDataset.getFeaturestore().getHiveDbId());
    trainingDatasetDTO.setFeaturestoreName(featurestoreName);
    // Set features
    List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, true);
    Map<Integer, String> fsLookupTable = getFsLookupTableFeatures(tdFeatures);
    trainingDatasetDTO.setFeatures(tdFeatures.stream().map(f -> new TrainingDatasetFeatureDTO(checkPrefix(f), f.getType(), f.getFeatureGroup() != null ? new FeaturegroupDTO(f.getFeatureGroup().getFeaturestore().getId(), fsLookupTable.get(f.getFeatureGroup().getFeaturestore().getId()), f.getFeatureGroup().getId(), f.getFeatureGroup().getName(), f.getFeatureGroup().getVersion(), onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), f.getFeatureGroup().getId(), Utils.getFeaturegroupName(f.getFeatureGroup()))) : null, f.getIndex(), f.isLabel())).collect(Collectors.toList()));
    switch(trainingDataset.getTrainingDatasetType()) {
        case HOPSFS_TRAINING_DATASET:
            return hopsfsTrainingDatasetController.convertHopsfsTrainingDatasetToDTO(trainingDatasetDTO, trainingDataset);
        case EXTERNAL_TRAINING_DATASET:
            return externalTrainingDatasetController.convertExternalTrainingDatasetToDTO(user, project, trainingDatasetDTO, trainingDataset);
        default:
            throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE.getMessage() + ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDataset.getTrainingDatasetType());
    }
}
Also used : TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)

Example 2 with TrainingDatasetFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.

the class TrainingDatasetController method collectFeatures.

// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
    List<TrainingDatasetFeature> features = new ArrayList<>();
    boolean isLabel = false;
    TransformationFunction transformationFunction = null;
    for (Feature f : query.getFeatures()) {
        if (featureDTOs != null && !featureDTOs.isEmpty()) {
            // identify if feature is label
            isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
            // get transformation function for this feature
            transformationFunction = getTransformationFunction(f, featureDTOs);
        }
        features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
    }
    if (query.getJoins() != null) {
        for (Join join : query.getJoins()) {
            joinIndex++;
            List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
            features.addAll(joinFeatures);
            featureIndex += joinFeatures.size();
        }
    }
    return features;
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) FeaturegroupType(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType) Date(java.util.Date) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HopsfsTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetController) HopsFSProvenanceController(io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController) Settings(io.hops.hopsworks.common.util.Settings) TransactionAttributeType(javax.ejb.TransactionAttributeType) Map(java.util.Map) FilterValue(io.hops.hopsworks.common.featurestore.query.filter.FilterValue) FeatureView(io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) Utils(io.hops.hopsworks.common.hdfs.Utils) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) TransformationFunctionFacade(io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade) HopsfsTrainingDatasetFacade(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade) Collection(java.util.Collection) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) StatisticColumnController(io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController) FeaturestoreConnectorFacade(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade) Collectors(java.util.stream.Collectors) QueryController(io.hops.hopsworks.common.featurestore.query.QueryController) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) ExternalTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetController) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) DistributedFsService(io.hops.hopsworks.common.hdfs.DistributedFsService) InodeController(io.hops.hopsworks.common.hdfs.inode.InodeController) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) TrainingDatasetJoinCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoinCondition) FeaturestoreConnectorType(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType) HashMap(java.util.HashMap) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) Streams(com.logicalclocks.shaded.com.google.common.collect.Streams) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) TrainingDatasetFilterCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilterCondition) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) PitJoinController(io.hops.hopsworks.common.featurestore.query.pit.PitJoinController) TransactionAttribute(javax.ejb.TransactionAttribute) HdfsUsersController(io.hops.hopsworks.common.hdfs.HdfsUsersController) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetType(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) EJB(javax.ejb.EJB) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) StatisticsController(io.hops.hopsworks.common.featurestore.statistics.StatisticsController) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) ServiceException(io.hops.hopsworks.exceptions.ServiceException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature)

Example 3 with TrainingDatasetFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.

the class TrainingDatasetController method getTransformationFunction.

private TransformationFunction getTransformationFunction(Feature feature, List<TrainingDatasetFeatureDTO> featureDTOs) throws FeaturestoreException {
    TrainingDatasetFeatureDTO featureDTO = featureDTOs.stream().filter(dto -> feature.getName().equals(dto.getFeatureGroupFeatureName())).findFirst().orElse(null);
    TransformationFunction transformationFunction = null;
    if (featureDTO != null && featureDTO.getTransformationFunction() != null) {
        transformationFunction = transformationFunctionFacade.findById(featureDTO.getTransformationFunction().getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRANSFORMATION_FUNCTION_DOES_NOT_EXIST, Level.FINE, "Could not find transformation function with ID" + featureDTO.getTransformationFunction().getId()));
    }
    return transformationFunction;
}
Also used : TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)

Example 4 with TrainingDatasetFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.

the class TrainingDatasetInputValidation method validateFeatures.

public void validateFeatures(Query query, List<TrainingDatasetFeatureDTO> featuresDTOs) throws FeaturestoreException {
    if (query == null || featuresDTOs == null) {
        // needed.
        return;
    }
    List<TrainingDatasetFeatureDTO> labels = featuresDTOs.stream().filter(TrainingDatasetFeatureDTO::getLabel).collect(Collectors.toList());
    List<TrainingDatasetFeatureDTO> featuresWithTransformation = featuresDTOs.stream().filter(f -> f.getTransformationFunction() != null).collect(Collectors.toList());
    List<Feature> features = collectFeatures(query);
    for (TrainingDatasetFeatureDTO label : labels) {
        if (features.stream().noneMatch(f -> f.getName().equals(label.getName()))) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.LABEL_NOT_FOUND, Level.FINE, "Label: " + label.getName() + " is missing");
        }
    }
    for (TrainingDatasetFeatureDTO featureWithTransformation : featuresWithTransformation) {
        if (features.stream().noneMatch(f -> f.getName().equals(featureWithTransformation.getFeatureGroupFeatureName()))) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_WITH_TRANSFORMATION_NOT_FOUND, Level.FINE, "feature: " + featureWithTransformation.getName() + " is missing and transformation function can't be attached");
        }
    }
    // verify join prefix if any
    if (query != null && query.getJoins() != null) {
        for (Join join : query.getJoins()) {
            if (join.getPrefix() != null) {
                Pattern namePattern = FeaturestoreConstants.FEATURESTORE_REGEX;
                if (!namePattern.matcher(join.getPrefix()).matches()) {
                    throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_PREFIX_NAME, Level.FINE, ", the provided prefix name " + join.getPrefix() + " is invalid. Prefix names can only contain lower" + " case characters, numbers and underscores and cannot be longer than " + FeaturestoreConstants.FEATURESTORE_ENTITY_NAME_MAX_LENGTH + " characters or empty.");
                }
            }
        }
    }
}
Also used : Feature(io.hops.hopsworks.common.featurestore.query.Feature) Strings(joptsimple.internal.Strings) FeaturestoreConnectorType(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType) StringUtils(org.apache.commons.lang3.StringUtils) TrainingDatasetSplitDTO(io.hops.hopsworks.common.featurestore.trainingdatasets.split.TrainingDatasetSplitDTO) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HashSet(java.util.HashSet) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreInputValidation(io.hops.hopsworks.common.featurestore.utils.FeaturestoreInputValidation) TransactionAttribute(javax.ejb.TransactionAttribute) Query(io.hops.hopsworks.common.featurestore.query.Query) TrainingDatasetType(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType) EJB(javax.ejb.EJB) Stateless(javax.ejb.Stateless) FeaturestoreConstants(io.hops.hopsworks.common.featurestore.FeaturestoreConstants) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Set(java.util.Set) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) StatisticColumnController(io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController) FeaturestoreConnectorFacade(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade) Collectors(java.util.stream.Collectors) FeaturestoreStorageConnectorDTO(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreStorageConnectorDTO) List(java.util.List) Pattern(java.util.regex.Pattern) FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) Pattern(java.util.regex.Pattern) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Join(io.hops.hopsworks.common.featurestore.query.join.Join) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) Feature(io.hops.hopsworks.common.featurestore.query.Feature)

Example 5 with TrainingDatasetFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO in project hopsworks by logicalclocks.

the class HopsFSProvenanceController method fromTrainingDatasetQuery.

private List<FeaturegroupXAttr.SimplifiedDTO> fromTrainingDatasetQuery(TrainingDatasetDTO trainingDatasetDTO) {
    Map<Integer, FeaturegroupXAttr.SimplifiedDTO> featuregroups = new HashMap<>();
    for (TrainingDatasetFeatureDTO feature : trainingDatasetDTO.getFeatures()) {
        FeaturegroupXAttr.SimplifiedDTO featuregroup = featuregroups.get(feature.getFeaturegroup().getId());
        if (featuregroup == null) {
            featuregroup = new FeaturegroupXAttr.SimplifiedDTO(feature.getFeaturegroup().getFeaturestoreId(), feature.getFeaturegroup().getName(), feature.getFeaturegroup().getVersion());
            featuregroups.put(feature.getFeaturegroup().getId(), featuregroup);
        }
        featuregroup.addFeature(feature.getName());
    }
    return new ArrayList<>(featuregroups.values());
}
Also used : FeaturegroupXAttr(io.hops.hopsworks.common.featurestore.xattr.dto.FeaturegroupXAttr) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList)

Aggregations

TrainingDatasetFeatureDTO (io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO)6 ArrayList (java.util.ArrayList)4 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)3 TrainingDatasetFeature (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature)3 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)2 Feature (io.hops.hopsworks.common.featurestore.query.Feature)2 Query (io.hops.hopsworks.common.featurestore.query.Query)2 Join (io.hops.hopsworks.common.featurestore.query.join.Join)2 StatisticColumnController (io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController)2 FeaturestoreConnectorFacade (io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade)2 FeaturestoreConnector (io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector)2 FeaturestoreConnectorType (io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType)2 TrainingDatasetType (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType)2 TransformationFunction (io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)2 RESTCodes (io.hops.hopsworks.restutils.RESTCodes)2 List (java.util.List)2 Level (java.util.logging.Level)2 Collectors (java.util.stream.Collectors)2 EJB (javax.ejb.EJB)2 Stateless (javax.ejb.Stateless)2