Search in sources :

Example 1 with StatisticsConfig

use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig in project hopsworks by logicalclocks.

the class TrainingDatasetController method createTrainingDatasetMetadata.

/**
 * Creates the metadata structure in DB for the training dataset
 */
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
    // Create specific dataset type
    HopsfsTrainingDataset hopsfsTrainingDataset = null;
    ExternalTrainingDataset externalTrainingDataset = null;
    switch(trainingDatasetDTO.getTrainingDatasetType()) {
        case HOPSFS_TRAINING_DATASET:
            hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
            break;
        case EXTERNAL_TRAINING_DATASET:
            externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
            break;
        default:
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
    }
    // Store trainingDataset metadata in Hopsworks
    TrainingDataset trainingDataset = new TrainingDataset();
    trainingDataset.setName(trainingDatasetDTO.getName());
    trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
    trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
    trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
    trainingDataset.setDescription(trainingDatasetDTO.getDescription());
    trainingDataset.setFeaturestore(featurestore);
    trainingDataset.setCreated(new Date());
    trainingDataset.setCreator(user);
    trainingDataset.setVersion(trainingDatasetDTO.getVersion());
    trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
    trainingDataset.setSeed(trainingDatasetDTO.getSeed());
    trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
    trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
    StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
    statisticsConfig.setTrainingDataset(trainingDataset);
    statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
    trainingDataset.setStatisticsConfig(statisticsConfig);
    trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
    // set features/query
    trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
    if (trainingDataset.isQuery()) {
        setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
    } else {
        trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
    }
    TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
    // Log the metadata operation
    fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
    // Get final entity from the database
    return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
Also used : TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) Date(java.util.Date) TransactionAttribute(javax.ejb.TransactionAttribute)

Example 2 with StatisticsConfig

use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig in project hopsworks by logicalclocks.

the class TestStatisticColumnController method testIsColumnExists.

@Test
public void testIsColumnExists() {
    StatisticsConfig statisticsConfig = new StatisticsConfig();
    List<StatisticColumn> statisticColumnList = Arrays.asList(new StatisticColumn(statisticsConfig, "ft1"), new StatisticColumn(statisticsConfig, "ft2"));
    Assert.assertTrue(statisticColumnController.isColumnExists(statisticColumnList, "ft1"));
    Assert.assertFalse(statisticColumnController.isColumnExists(statisticColumnList, "ft3"));
}
Also used : StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Test(org.junit.Test)

Example 3 with StatisticsConfig

use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig in project hopsworks by logicalclocks.

the class FeaturegroupController method persistFeaturegroupMetadata.

/**
 * Persists metadata of a new feature group in the feature_group table
 *
 * @param featurestore the featurestore of the feature group
 * @param user the Hopsworks user making the request
 * @param featuregroupDTO DTO of the feature group
 * @param cachedFeaturegroup the cached feature group that the feature group is linked to (if any)
 * @param onDemandFeaturegroup the on-demand feature group that the feature group is linked to (if any)
 * @return the created entity
 */
private Featuregroup persistFeaturegroupMetadata(Featurestore featurestore, Users user, FeaturegroupDTO featuregroupDTO, CachedFeaturegroup cachedFeaturegroup, OnDemandFeaturegroup onDemandFeaturegroup) throws FeaturestoreException {
    Featuregroup featuregroup = new Featuregroup();
    featuregroup.setName(featuregroupDTO.getName());
    featuregroup.setFeaturestore(featurestore);
    featuregroup.setCreated(new Date());
    featuregroup.setCreator(user);
    featuregroup.setVersion(featuregroupDTO.getVersion());
    if (featuregroupDTO.getValidationType() != null) {
        featuregroup.setValidationType(featuregroupDTO.getValidationType());
    }
    featuregroup.setFeaturegroupType(featuregroupDTO instanceof CachedFeaturegroupDTO ? FeaturegroupType.CACHED_FEATURE_GROUP : FeaturegroupType.ON_DEMAND_FEATURE_GROUP);
    featuregroup.setCachedFeaturegroup(cachedFeaturegroup);
    featuregroup.setOnDemandFeaturegroup(onDemandFeaturegroup);
    featuregroup.setEventTime(featuregroupDTO.getEventTime());
    StatisticsConfig statisticsConfig = new StatisticsConfig(featuregroupDTO.getStatisticsConfig().getEnabled(), featuregroupDTO.getStatisticsConfig().getCorrelations(), featuregroupDTO.getStatisticsConfig().getHistograms(), featuregroupDTO.getStatisticsConfig().getExactUniqueness());
    statisticsConfig.setFeaturegroup(featuregroup);
    statisticsConfig.setStatisticColumns(featuregroupDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
    featuregroup.setStatisticsConfig(statisticsConfig);
    if (featuregroupDTO.getExpectationsNames() != null) {
        List<FeatureGroupExpectation> featureGroupExpectations = new ArrayList<>();
        for (String name : featuregroupDTO.getExpectationsNames()) {
            FeatureStoreExpectation featureStoreExpectation = featureGroupValidationsController.getFeatureStoreExpectation(featuregroup.getFeaturestore(), name);
            FeatureGroupExpectation featureGroupExpectation;
            Optional<FeatureGroupExpectation> e = featureGroupExpectationFacade.findByFeaturegroupAndExpectation(featuregroup, featureStoreExpectation);
            if (!e.isPresent()) {
                featureGroupExpectation = new FeatureGroupExpectation();
                featureGroupExpectation.setFeaturegroup(featuregroup);
                featureGroupExpectation.setFeatureStoreExpectation(featureStoreExpectation);
            } else {
                featureGroupExpectation = e.get();
            }
            featureGroupExpectations.add(featureGroupExpectation);
        }
        featuregroup.setExpectations(featureGroupExpectations);
    }
    featuregroupFacade.persist(featuregroup);
    return featuregroup;
}
Also used : StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) FeatureGroupExpectation(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupExpectation) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) ArrayList(java.util.ArrayList) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) FeatureStoreExpectation(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureStoreExpectation) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Date(java.util.Date)

Example 4 with StatisticsConfig

use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig in project hopsworks by logicalclocks.

the class TestStatisticColumnController method testIsEntityToBeDropped.

@Test
public void testIsEntityToBeDropped() {
    StatisticsConfig statisticsConfig = new StatisticsConfig();
    StatisticColumn toDrop = new StatisticColumn(statisticsConfig, "ft1");
    StatisticColumn notToDrop = new StatisticColumn(statisticsConfig, "ft2");
    List<String> columns = Arrays.asList("ft2", "ft3");
    Assert.assertTrue(statisticColumnController.isEntityToBeDropped(toDrop, columns));
    Assert.assertFalse(statisticColumnController.isEntityToBeDropped(notToDrop, columns));
}
Also used : StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Test(org.junit.Test)

Aggregations

StatisticColumn (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn)4 StatisticsConfig (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig)4 Date (java.util.Date)2 Test (org.junit.Test)2 CachedFeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO)1 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)1 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)1 CachedFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)1 FeatureGroupExpectation (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupExpectation)1 FeatureStoreExpectation (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureStoreExpectation)1 OnDemandFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup)1 TrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset)1 ExternalTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset)1 HopsfsTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset)1 TrainingDatasetSplit (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit)1 ArrayList (java.util.ArrayList)1 TransactionAttribute (javax.ejb.TransactionAttribute)1