use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method updateTrainingDatasetStatsConfig.
public TrainingDatasetDTO updateTrainingDatasetStatsConfig(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ServiceException {
TrainingDataset trainingDataset = getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
if (trainingDatasetDTO.getStatisticsConfig().getEnabled() != null) {
trainingDataset.getStatisticsConfig().setDescriptive(trainingDatasetDTO.getStatisticsConfig().getEnabled());
}
if (trainingDatasetDTO.getStatisticsConfig().getHistograms() != null) {
trainingDataset.getStatisticsConfig().setHistograms(trainingDatasetDTO.getStatisticsConfig().getHistograms());
}
if (trainingDatasetDTO.getStatisticsConfig().getCorrelations() != null) {
trainingDataset.getStatisticsConfig().setCorrelations(trainingDatasetDTO.getStatisticsConfig().getCorrelations());
}
if (trainingDatasetDTO.getStatisticsConfig().getExactUniqueness() != null) {
trainingDataset.getStatisticsConfig().setExactUniqueness(trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
}
// compare against schema from database, as client doesn't need to send schema in update request
statisticColumnController.verifyStatisticColumnsExist(trainingDatasetDTO, trainingDataset);
trainingDataset = trainingDatasetFacade.update(trainingDataset);
statisticColumnController.persistStatisticColumns(trainingDataset, trainingDatasetDTO.getStatisticsConfig().getColumns());
// get feature group again with persisted columns - this trip to the database can be saved
trainingDataset = getTrainingDatasetById(featurestore, trainingDatasetDTO.getId());
return convertTrainingDatasetToDTO(user, project, trainingDataset);
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method getWithNameAndFeaturestore.
public List<TrainingDatasetDTO> getWithNameAndFeaturestore(Users user, Project project, Featurestore featurestore, String name) throws FeaturestoreException, ServiceException {
List<TrainingDataset> trainingDatasetList = trainingDatasetFacade.findByNameAndFeaturestore(name, featurestore);
if (trainingDatasetList == null || trainingDatasetList.isEmpty()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset name : " + name);
}
List<TrainingDatasetDTO> trainingDatasetDTOS = new ArrayList<>();
for (TrainingDataset td : trainingDatasetList) {
trainingDatasetDTOS.add(convertTrainingDatasetToDTO(user, project, td));
}
return trainingDatasetDTOS;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method delete.
public String delete(Users user, Project project, Featurestore featurestore, Integer trainingDatasetId) throws FeaturestoreException {
TrainingDataset trainingDataset = trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetId, featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id:" + trainingDatasetId));
featurestoreUtils.verifyUserRole(trainingDataset, featurestore, user, project);
statisticsController.deleteStatistics(project, user, trainingDataset);
String dsPath = getTrainingDatasetInodePath(trainingDataset);
String username = hdfsUsersBean.getHdfsUserName(project, user);
// we rely on the foreign keys to cascade from inode -> external/hopsfs td -> trainig dataset
DistributedFileSystemOps udfso = dfs.getDfsOps(username);
try {
// TODO(Fabio): if Data owner *In project* do operation as superuser
udfso.rm(dsPath, true);
} catch (IOException e) {
} finally {
if (udfso != null) {
dfs.closeDfsClient(udfso);
}
}
return trainingDataset.getName();
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectFeatures.
// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
List<TrainingDatasetFeature> features = new ArrayList<>();
boolean isLabel = false;
TransformationFunction transformationFunction = null;
for (Feature f : query.getFeatures()) {
if (featureDTOs != null && !featureDTOs.isEmpty()) {
// identify if feature is label
isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
// get transformation function for this feature
transformationFunction = getTransformationFunction(f, featureDTOs);
}
features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
}
if (query.getJoins() != null) {
for (Join join : query.getJoins()) {
joinIndex++;
List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
features.addAll(joinFeatures);
featureIndex += joinFeatures.size();
}
}
return features;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method createTrainingDatasetMetadata.
/**
* Creates the metadata structure in DB for the training dataset
*/
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
// Create specific dataset type
HopsfsTrainingDataset hopsfsTrainingDataset = null;
ExternalTrainingDataset externalTrainingDataset = null;
switch(trainingDatasetDTO.getTrainingDatasetType()) {
case HOPSFS_TRAINING_DATASET:
hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
break;
case EXTERNAL_TRAINING_DATASET:
externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
break;
default:
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
}
// Store trainingDataset metadata in Hopsworks
TrainingDataset trainingDataset = new TrainingDataset();
trainingDataset.setName(trainingDatasetDTO.getName());
trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
trainingDataset.setDescription(trainingDatasetDTO.getDescription());
trainingDataset.setFeaturestore(featurestore);
trainingDataset.setCreated(new Date());
trainingDataset.setCreator(user);
trainingDataset.setVersion(trainingDatasetDTO.getVersion());
trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
trainingDataset.setSeed(trainingDatasetDTO.getSeed());
trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
statisticsConfig.setTrainingDataset(trainingDataset);
statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
trainingDataset.setStatisticsConfig(statisticsConfig);
trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
// set features/query
trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
if (trainingDataset.isQuery()) {
setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
} else {
trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
}
TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
// Log the metadata operation
fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
// Get final entity from the database
return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
Aggregations