use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method createTrainingDatasetMetadata.
/**
* Creates the metadata structure in DB for the training dataset
*/
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
// Create specific dataset type
HopsfsTrainingDataset hopsfsTrainingDataset = null;
ExternalTrainingDataset externalTrainingDataset = null;
switch(trainingDatasetDTO.getTrainingDatasetType()) {
case HOPSFS_TRAINING_DATASET:
hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
break;
case EXTERNAL_TRAINING_DATASET:
externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
break;
default:
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
}
// Store trainingDataset metadata in Hopsworks
TrainingDataset trainingDataset = new TrainingDataset();
trainingDataset.setName(trainingDatasetDTO.getName());
trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
trainingDataset.setDescription(trainingDatasetDTO.getDescription());
trainingDataset.setFeaturestore(featurestore);
trainingDataset.setCreated(new Date());
trainingDataset.setCreator(user);
trainingDataset.setVersion(trainingDatasetDTO.getVersion());
trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
trainingDataset.setSeed(trainingDatasetDTO.getSeed());
trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
statisticsConfig.setTrainingDataset(trainingDataset);
statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
trainingDataset.setStatisticsConfig(statisticsConfig);
trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
// set features/query
trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
if (trainingDataset.isQuery()) {
setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
} else {
trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
}
TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
// Log the metadata operation
fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
// Get final entity from the database
return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.
the class HopsfsTrainingDatasetController method convertHopsfsTrainingDatasetToDTO.
/**
* Converts a Hopsfs Training Dataset entity into a DTO representation
*
* @param trainingDataset the entity to convert
* @return the converted DTO representation
*/
public TrainingDatasetDTO convertHopsfsTrainingDatasetToDTO(TrainingDatasetDTO trainingDatasetDTO, TrainingDataset trainingDataset) throws ServiceException {
Service namenodeService;
try {
namenodeService = serviceDiscoveryController.getAnyAddressOfServiceWithDNS(ServiceDiscoveryController.HopsworksService.RPC_NAMENODE);
} catch (ServiceDiscoveryException e) {
throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "Could not find namenode service", e.getMessage(), e);
}
HopsfsTrainingDataset hopsfsTrainingDataset = trainingDataset.getHopsfsTrainingDataset();
trainingDatasetDTO.setLocation(new Path(DistributedFileSystemOps.HOPSFS_SCHEME, namenodeService.getAddress() + ":" + namenodeService.getPort(), inodeController.getPath(hopsfsTrainingDataset.getInode())).toString());
trainingDatasetDTO.setInodeId(hopsfsTrainingDataset.getInode().getId());
FeaturestoreHopsfsConnectorDTO hopsfsConnectorDTO = new FeaturestoreHopsfsConnectorDTO(hopsfsTrainingDataset.getFeaturestoreConnector());
trainingDatasetDTO.setStorageConnector(hopsfsConnectorDTO);
return trainingDatasetDTO;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.
the class HopsfsTrainingDatasetFacade method createHopsfsTrainingDataset.
/**
* Create and persiste a HopsFS training dataset
* @param connector
* @param inode
* @return
*/
public HopsfsTrainingDataset createHopsfsTrainingDataset(FeaturestoreConnector connector, Inode inode) {
HopsfsTrainingDataset hopsfsTrainingDataset = new HopsfsTrainingDataset();
hopsfsTrainingDataset.setInode(inode);
hopsfsTrainingDataset.setFeaturestoreConnector(connector);
em.persist(hopsfsTrainingDataset);
em.flush();
return hopsfsTrainingDataset;
}
Aggregations