Search in sources :

Example 1 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method updateTrainingDatasetStatsConfig.

public TrainingDatasetDTO updateTrainingDatasetStatsConfig(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ServiceException {
    TrainingDataset trainingDataset = getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
    if (trainingDatasetDTO.getStatisticsConfig().getEnabled() != null) {
        trainingDataset.getStatisticsConfig().setDescriptive(trainingDatasetDTO.getStatisticsConfig().getEnabled());
    }
    if (trainingDatasetDTO.getStatisticsConfig().getHistograms() != null) {
        trainingDataset.getStatisticsConfig().setHistograms(trainingDatasetDTO.getStatisticsConfig().getHistograms());
    }
    if (trainingDatasetDTO.getStatisticsConfig().getCorrelations() != null) {
        trainingDataset.getStatisticsConfig().setCorrelations(trainingDatasetDTO.getStatisticsConfig().getCorrelations());
    }
    if (trainingDatasetDTO.getStatisticsConfig().getExactUniqueness() != null) {
        trainingDataset.getStatisticsConfig().setExactUniqueness(trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
    }
    // compare against schema from database, as client doesn't need to send schema in update request
    statisticColumnController.verifyStatisticColumnsExist(trainingDatasetDTO, trainingDataset);
    trainingDataset = trainingDatasetFacade.update(trainingDataset);
    statisticColumnController.persistStatisticColumns(trainingDataset, trainingDatasetDTO.getStatisticsConfig().getColumns());
    // get feature group again with persisted columns - this trip to the database can be saved
    trainingDataset = getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
    return convertTrainingDatasetToDTO(user, project, trainingDataset);
}
Also used : TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset)

Example 2 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method getWithNameAndFeaturestore.

public List<TrainingDatasetDTO> getWithNameAndFeaturestore(Users user, Project project, Featurestore featurestore, String name) throws FeaturestoreException, ServiceException {
    List<TrainingDataset> trainingDatasetList = trainingDatasetFacade.findByNameAndFeaturestore(name, featurestore);
    if (trainingDatasetList == null || trainingDatasetList.isEmpty()) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset name : " + name);
    }
    List<TrainingDatasetDTO> trainingDatasetDTOS = new ArrayList<>();
    for (TrainingDataset td : trainingDatasetList) {
        trainingDatasetDTOS.add(convertTrainingDatasetToDTO(user, project, td));
    }
    return trainingDatasetDTOS;
}
Also used : TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) ArrayList(java.util.ArrayList) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 3 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method delete.

public String delete(Users user, Project project, Featurestore featurestore, Integer trainingDatasetId) throws FeaturestoreException {
    TrainingDataset trainingDataset = trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetId, featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id:" + trainingDatasetId));
    featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project);
    statisticsController.deleteStatistics(project, user, trainingDataset);
    String dsPath = getTrainingDatasetInodePath(trainingDataset);
    String username = hdfsUsersBean.getHdfsUserName(project, user);
    // we rely on the foreign keys to cascade from inode -> external/hopsfs td -> trainig dataset
    DistributedFileSystemOps udfso = dfs.getDfsOps(username);
    try {
        // TODO(Fabio): if Data owner *In project* do operation as superuser
        udfso.rm(dsPath, true);
    } catch (IOException e) {
    } finally {
        if (udfso != null) {
            dfs.closeDfsClient(udfso);
        }
    }
    return trainingDataset.getName();
}
Also used : TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) IOException(java.io.IOException) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 4 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method collectFeatures.

// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
    List<TrainingDatasetFeature> features = new ArrayList<>();
    boolean isLabel = false;
    TransformationFunction transformationFunction = null;
    for (Feature f : query.getFeatures()) {
        if (featureDTOs != null && !featureDTOs.isEmpty()) {
            // identify if feature is label
            isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
            // get transformation function for this feature
            transformationFunction = getTransformationFunction(f, featureDTOs);
        }
        features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
    }
    if (query.getJoins() != null) {
        for (Join join : query.getJoins()) {
            joinIndex++;
            List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
            features.addAll(joinFeatures);
            featureIndex += joinFeatures.size();
        }
    }
    return features;
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) FeaturegroupType(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType) Date(java.util.Date) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HopsfsTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetController) HopsFSProvenanceController(io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController) Settings(io.hops.hopsworks.common.util.Settings) TransactionAttributeType(javax.ejb.TransactionAttributeType) Map(java.util.Map) FilterValue(io.hops.hopsworks.common.featurestore.query.filter.FilterValue) FeatureView(io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) Utils(io.hops.hopsworks.common.hdfs.Utils) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) TransformationFunctionFacade(io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade) HopsfsTrainingDatasetFacade(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade) Collection(java.util.Collection) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) StatisticColumnController(io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController) FeaturestoreConnectorFacade(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade) Collectors(java.util.stream.Collectors) QueryController(io.hops.hopsworks.common.featurestore.query.QueryController) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) ExternalTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetController) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) DistributedFsService(io.hops.hopsworks.common.hdfs.DistributedFsService) InodeController(io.hops.hopsworks.common.hdfs.inode.InodeController) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) TrainingDatasetJoinCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoinCondition) FeaturestoreConnectorType(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType) HashMap(java.util.HashMap) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) Streams(com.logicalclocks.shaded.com.google.common.collect.Streams) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) TrainingDatasetFilterCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilterCondition) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) PitJoinController(io.hops.hopsworks.common.featurestore.query.pit.PitJoinController) TransactionAttribute(javax.ejb.TransactionAttribute) HdfsUsersController(io.hops.hopsworks.common.hdfs.HdfsUsersController) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetType(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) EJB(javax.ejb.EJB) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) StatisticsController(io.hops.hopsworks.common.featurestore.statistics.StatisticsController) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) ServiceException(io.hops.hopsworks.exceptions.ServiceException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature)

Example 5 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method createTrainingDatasetMetadata.

/**
 * Creates the metadata structure in DB for the training dataset
 */
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
    // Create specific dataset type
    HopsfsTrainingDataset hopsfsTrainingDataset = null;
    ExternalTrainingDataset externalTrainingDataset = null;
    switch(trainingDatasetDTO.getTrainingDatasetType()) {
        case HOPSFS_TRAINING_DATASET:
            hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
            break;
        case EXTERNAL_TRAINING_DATASET:
            externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
            break;
        default:
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
    }
    // Store trainingDataset metadata in Hopsworks
    TrainingDataset trainingDataset = new TrainingDataset();
    trainingDataset.setName(trainingDatasetDTO.getName());
    trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
    trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
    trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
    trainingDataset.setDescription(trainingDatasetDTO.getDescription());
    trainingDataset.setFeaturestore(featurestore);
    trainingDataset.setCreated(new Date());
    trainingDataset.setCreator(user);
    trainingDataset.setVersion(trainingDatasetDTO.getVersion());
    trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
    trainingDataset.setSeed(trainingDatasetDTO.getSeed());
    trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
    trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
    StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
    statisticsConfig.setTrainingDataset(trainingDataset);
    statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
    trainingDataset.setStatisticsConfig(statisticsConfig);
    trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
    // set features/query
    trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
    if (trainingDataset.isQuery()) {
        setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
    } else {
        trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
    }
    TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
    // Log the metadata operation
    fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
    // Get final entity from the database
    return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
Also used : TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) Date(java.util.Date) TransactionAttribute(javax.ejb.TransactionAttribute)

Aggregations

TrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset)29 Users (io.hops.hopsworks.persistence.entity.user.Users)12 ArrayList (java.util.ArrayList)11 Feature (io.hops.hopsworks.common.featurestore.query.Feature)10 Filter (io.hops.hopsworks.common.featurestore.query.filter.Filter)10 FilterLogic (io.hops.hopsworks.common.featurestore.query.filter.FilterLogic)10 Path (javax.ws.rs.Path)10 DatasetPath (io.hops.hopsworks.common.dataset.util.DatasetPath)9 JWTRequired (io.hops.hopsworks.jwt.annotation.JWTRequired)9 SqlFilterLogic (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic)9 TrainingDatasetFilter (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter)9 ApiOperation (io.swagger.annotations.ApiOperation)9 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)8 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)8 ResourceRequest (io.hops.hopsworks.common.api.ResourceRequest)8 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)8 ExternalTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset)8 HopsfsTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset)8 Produces (javax.ws.rs.Produces)8 Test (org.junit.Test)8