Search in sources :

Example 1 with HopsfsTrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method createTrainingDatasetMetadata.

/**
 * Creates the metadata structure in DB for the training dataset
 */
@TransactionAttribute(TransactionAttributeType.REQUIRED)
private TrainingDatasetDTO createTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO, Query query, FeaturestoreConnector featurestoreConnector, Inode inode) throws FeaturestoreException, ServiceException {
    // Create specific dataset type
    HopsfsTrainingDataset hopsfsTrainingDataset = null;
    ExternalTrainingDataset externalTrainingDataset = null;
    switch(trainingDatasetDTO.getTrainingDatasetType()) {
        case HOPSFS_TRAINING_DATASET:
            hopsfsTrainingDataset = hopsfsTrainingDatasetFacade.createHopsfsTrainingDataset(featurestoreConnector, inode);
            break;
        case EXTERNAL_TRAINING_DATASET:
            externalTrainingDataset = externalTrainingDatasetController.create(featurestoreConnector, trainingDatasetDTO.getLocation(), inode);
            break;
        default:
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_TRAINING_DATASET_TYPE, Level.FINE, ", Recognized training dataset types are: " + TrainingDatasetType.HOPSFS_TRAINING_DATASET + ", and: " + TrainingDatasetType.EXTERNAL_TRAINING_DATASET + ". The provided training dataset type was not recognized: " + trainingDatasetDTO.getTrainingDatasetType());
    }
    // Store trainingDataset metadata in Hopsworks
    TrainingDataset trainingDataset = new TrainingDataset();
    trainingDataset.setName(trainingDatasetDTO.getName());
    trainingDataset.setHopsfsTrainingDataset(hopsfsTrainingDataset);
    trainingDataset.setExternalTrainingDataset(externalTrainingDataset);
    trainingDataset.setDataFormat(trainingDatasetDTO.getDataFormat());
    trainingDataset.setDescription(trainingDatasetDTO.getDescription());
    trainingDataset.setFeaturestore(featurestore);
    trainingDataset.setCreated(new Date());
    trainingDataset.setCreator(user);
    trainingDataset.setVersion(trainingDatasetDTO.getVersion());
    trainingDataset.setTrainingDatasetType(trainingDatasetDTO.getTrainingDatasetType());
    trainingDataset.setSeed(trainingDatasetDTO.getSeed());
    trainingDataset.setSplits(trainingDatasetDTO.getSplits().stream().map(tdDTO -> new TrainingDatasetSplit(trainingDataset, tdDTO.getName(), tdDTO.getPercentage())).collect(Collectors.toList()));
    trainingDataset.setCoalesce(trainingDatasetDTO.getCoalesce() != null ? trainingDatasetDTO.getCoalesce() : false);
    StatisticsConfig statisticsConfig = new StatisticsConfig(trainingDatasetDTO.getStatisticsConfig().getEnabled(), trainingDatasetDTO.getStatisticsConfig().getCorrelations(), trainingDatasetDTO.getStatisticsConfig().getHistograms(), trainingDatasetDTO.getStatisticsConfig().getExactUniqueness());
    statisticsConfig.setTrainingDataset(trainingDataset);
    statisticsConfig.setStatisticColumns(trainingDatasetDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
    trainingDataset.setStatisticsConfig(statisticsConfig);
    trainingDataset.setTrainSplit(trainingDatasetDTO.getTrainSplit());
    // set features/query
    trainingDataset.setQuery(trainingDatasetDTO.getQueryDTO() != null);
    if (trainingDataset.isQuery()) {
        setTrainingDatasetQuery(query, trainingDatasetDTO.getFeatures(), trainingDataset);
    } else {
        trainingDataset.setFeatures(getTrainingDatasetFeatures(trainingDatasetDTO.getFeatures(), trainingDataset));
    }
    TrainingDataset dbTrainingDataset = trainingDatasetFacade.update(trainingDataset);
    // Log the metadata operation
    fsActivityFacade.logMetadataActivity(user, dbTrainingDataset, FeaturestoreActivityMeta.TD_CREATED);
    // Get final entity from the database
    return convertTrainingDatasetToDTO(user, project, dbTrainingDataset);
}
Also used : TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) Date(java.util.Date) TransactionAttribute(javax.ejb.TransactionAttribute)

Example 2 with HopsfsTrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.

the class HopsfsTrainingDatasetController method convertHopsfsTrainingDatasetToDTO.

/**
 * Converts a Hopsfs Training Dataset entity into a DTO representation
 *
 * @param trainingDataset the entity to convert
 * @return the converted DTO representation
 */
public TrainingDatasetDTO convertHopsfsTrainingDatasetToDTO(TrainingDatasetDTO trainingDatasetDTO, TrainingDataset trainingDataset) throws ServiceException {
    Service namenodeService;
    try {
        namenodeService = serviceDiscoveryController.getAnyAddressOfServiceWithDNS(ServiceDiscoveryController.HopsworksService.RPC_NAMENODE);
    } catch (ServiceDiscoveryException e) {
        throw new ServiceException(RESTCodes.ServiceErrorCode.SERVICE_NOT_FOUND, Level.SEVERE, "Could not find namenode service", e.getMessage(), e);
    }
    HopsfsTrainingDataset hopsfsTrainingDataset = trainingDataset.getHopsfsTrainingDataset();
    trainingDatasetDTO.setLocation(new Path(DistributedFileSystemOps.HOPSFS_SCHEME, namenodeService.getAddress() + ":" + namenodeService.getPort(), inodeController.getPath(hopsfsTrainingDataset.getInode())).toString());
    trainingDatasetDTO.setInodeId(hopsfsTrainingDataset.getInode().getId());
    FeaturestoreHopsfsConnectorDTO hopsfsConnectorDTO = new FeaturestoreHopsfsConnectorDTO(hopsfsTrainingDataset.getFeaturestoreConnector());
    trainingDatasetDTO.setStorageConnector(hopsfsConnectorDTO);
    return trainingDatasetDTO;
}
Also used : Path(org.apache.hadoop.fs.Path) FeaturestoreHopsfsConnectorDTO(io.hops.hopsworks.common.featurestore.storageconnectors.hopsfs.FeaturestoreHopsfsConnectorDTO) ServiceException(io.hops.hopsworks.exceptions.ServiceException) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) Service(com.logicalclocks.servicediscoverclient.service.Service) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)

Example 3 with HopsfsTrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset in project hopsworks by logicalclocks.

the class HopsfsTrainingDatasetFacade method createHopsfsTrainingDataset.

/**
 * Create and persiste a HopsFS training dataset
 * @param connector
 * @param inode
 * @return
 */
public HopsfsTrainingDataset createHopsfsTrainingDataset(FeaturestoreConnector connector, Inode inode) {
    HopsfsTrainingDataset hopsfsTrainingDataset = new HopsfsTrainingDataset();
    hopsfsTrainingDataset.setInode(inode);
    hopsfsTrainingDataset.setFeaturestoreConnector(connector);
    em.persist(hopsfsTrainingDataset);
    em.flush();
    return hopsfsTrainingDataset;
}
Also used : HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset)

Aggregations

HopsfsTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset)3 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)1 Service (com.logicalclocks.servicediscoverclient.service.Service)1 FeaturestoreHopsfsConnectorDTO (io.hops.hopsworks.common.featurestore.storageconnectors.hopsfs.FeaturestoreHopsfsConnectorDTO)1 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)1 ServiceException (io.hops.hopsworks.exceptions.ServiceException)1 StatisticColumn (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn)1 StatisticsConfig (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig)1 TrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset)1 ExternalTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset)1 TrainingDatasetSplit (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit)1 Date (java.util.Date)1 TransactionAttribute (javax.ejb.TransactionAttribute)1 Path (org.apache.hadoop.fs.Path)1