use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn in project hopsworks by logicalclocks.
the class TrainingDatasetController method createTrainingDatasetMetadata.
/**
* Creates the metadata structure in DB for the training dataset
*/
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
// Create specific dataset type
HopsfsTrainingDataset hopsfsTrainingDataset = null;
ExternalTrainingDataset externalTrainingDataset = null;
switch(trainingDatasetDTO.getTrainingDatasetType()) {
case HOPSFS_TRAINING_DATASET:
hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
break;
case EXTERNAL_TRAINING_DATASET:
externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
break;
default:
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
}
// Store trainingDataset metadata in Hopsworks
TrainingDataset trainingDataset = new TrainingDataset();
trainingDataset.setName(trainingDatasetDTO.getName());
trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
trainingDataset.setDescription(trainingDatasetDTO.getDescription());
trainingDataset.setFeaturestore(featurestore);
trainingDataset.setCreated(new Date());
trainingDataset.setCreator(user);
trainingDataset.setVersion(trainingDatasetDTO.getVersion());
trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
trainingDataset.setSeed(trainingDatasetDTO.getSeed());
trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
statisticsConfig.setTrainingDataset(trainingDataset);
statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
trainingDataset.setStatisticsConfig(statisticsConfig);
trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
// set features/query
trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
if (trainingDataset.isQuery()) {
setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
} else {
trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
}
TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
// Log the metadata operation
fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
// Get final entity from the database
return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn in project hopsworks by logicalclocks.
the class TestStatisticColumnController method testIsColumnExists.
@Test
public void testIsColumnExists() {
StatisticsConfig statisticsConfig = new StatisticsConfig();
List<StatisticColumn> statisticColumnList = Arrays.asList(new StatisticColumn(statisticsConfig, "ft1"), new StatisticColumn(statisticsConfig, "ft2"));
Assert.assertTrue(statisticColumnController.isColumnExists(statisticColumnList, "ft1"));
Assert.assertFalse(statisticColumnController.isColumnExists(statisticColumnList, "ft3"));
}
use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn in project hopsworks by logicalclocks.
the class FeaturegroupController method persistFeaturegroupMetadata.
/**
* Persists metadata of a new feature group in the feature_group table
*
* @param featurestore the featurestore of the feature group
* @param user the Hopsworks user making the request
* @param featuregroupDTO DTO of the feature group
* @param cachedFeaturegroup the cached feature group that the feature group is linked to (if any)
* @param onDemandFeaturegroup the on-demand feature group that the feature group is linked to (if any)
* @return the created entity
*/
private Featuregroup persistFeaturegroupMetadata(Featurestore featurestore, Users user, FeaturegroupDTO featuregroupDTO, CachedFeaturegroup cachedFeaturegroup, OnDemandFeaturegroup onDemandFeaturegroup) throws FeaturestoreException {
Featuregroup featuregroup = new Featuregroup();
featuregroup.setName(featuregroupDTO.getName());
featuregroup.setFeaturestore(featurestore);
featuregroup.setCreated(new Date());
featuregroup.setCreator(user);
featuregroup.setVersion(featuregroupDTO.getVersion());
if (featuregroupDTO.getValidationType() != null) {
featuregroup.setValidationType(featuregroupDTO.getValidationType());
}
featuregroup.setFeaturegroupType(featuregroupDTO instanceof CachedFeaturegroupDTO ? FeaturegroupType.CACHED_FEATURE_GROUP : FeaturegroupType.ON_DEMAND_FEATURE_GROUP);
featuregroup.setCachedFeaturegroup(cachedFeaturegroup);
featuregroup.setOnDemandFeaturegroup(onDemandFeaturegroup);
featuregroup.setEventTime(featuregroupDTO.getEventTime());
StatisticsConfig statisticsConfig = new StatisticsConfig(featuregroupDTO.getStatisticsConfig().getEnabled(), featuregroupDTO.getStatisticsConfig().getCorrelations(), featuregroupDTO.getStatisticsConfig().getHistograms(), featuregroupDTO.getStatisticsConfig().getExactUniqueness());
statisticsConfig.setFeaturegroup(featuregroup);
statisticsConfig.setStatisticColumns(featuregroupDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
featuregroup.setStatisticsConfig(statisticsConfig);
if (featuregroupDTO.getExpectationsNames() != null) {
List<FeatureGroupExpectation> featureGroupExpectations = new ArrayList<>();
for (String name : featuregroupDTO.getExpectationsNames()) {
FeatureStoreExpectation featureStoreExpectation = featureGroupValidationsController.getFeatureStoreExpectation(featuregroup.getFeaturestore(), name);
FeatureGroupExpectation featureGroupExpectation;
Optional<FeatureGroupExpectation> e = featureGroupExpectationFacade.findByFeaturegroupAndExpectation(featuregroup, featureStoreExpectation);
if (!e.isPresent()) {
featureGroupExpectation = new FeatureGroupExpectation();
featureGroupExpectation.setFeaturegroup(featuregroup);
featureGroupExpectation.setFeatureStoreExpectation(featureStoreExpectation);
} else {
featureGroupExpectation = e.get();
}
featureGroupExpectations.add(featureGroupExpectation);
}
featuregroup.setExpectations(featureGroupExpectations);
}
featuregroupFacade.persist(featuregroup);
return featuregroup;
}
use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn in project hopsworks by logicalclocks.
the class TestStatisticColumnController method testIsEntityToBeDropped.
@Test
public void testIsEntityToBeDropped() {
StatisticsConfig statisticsConfig = new StatisticsConfig();
StatisticColumn toDrop = new StatisticColumn(statisticsConfig, "ft1");
StatisticColumn notToDrop = new StatisticColumn(statisticsConfig, "ft2");
List<String> columns = Arrays.asList("ft2", "ft3");
Assert.assertTrue(statisticColumnController.isEntityToBeDropped(toDrop, columns));
Assert.assertFalse(statisticColumnController.isEntityToBeDropped(notToDrop, columns));
}
use of io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn in project hopsworks by logicalclocks.
the class StatisticColumnController method persistStatisticColumns.
/**
* Insert a list of statistic columns as statistic column entities linked to a feature group if they don't exist
* already and drop the ones not present anymore
*
* @param statisticsConfig the statistics config to link the columns to
* @param statisticColumns the columns to insert
*/
public void persistStatisticColumns(StatisticsConfig statisticsConfig, List<String> statisticColumns) {
if (statisticColumns != null) {
statisticColumns.forEach(statisticColumn -> {
if (!isColumnExists((List) statisticsConfig.getStatisticColumns(), statisticColumn)) {
StatisticColumn sc = new StatisticColumn();
sc.setStatisticsConfig(statisticsConfig);
sc.setName(statisticColumn);
statisticColumnFacade.persist(sc);
}
});
// drop all entities which are not in the list anymore
List<StatisticColumn> columnEntities = (List) statisticsConfig.getStatisticColumns();
columnEntities.forEach(scEntity -> {
if (isEntityToBeDropped(scEntity, statisticColumns)) {
statisticColumnFacade.remove(scEntity);
}
});
}
}
Aggregations