Search in sources :

Example 1 with OnDemandFeaturegroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup in project hopsworks by logicalclocks.

the class FeaturegroupController method createFeaturegroupNoValidation.

public FeaturegroupDTO createFeaturegroupNoValidation(Featurestore featurestore, FeaturegroupDTO featuregroupDTO, Project project, Users user) throws FeaturestoreException, SQLException, ProvenanceException, ServiceException, KafkaException, SchemaException, ProjectException, UserException, IOException, HopsSecurityException {
    // Persist specific feature group metadata (cached fg or on-demand fg)
    OnDemandFeaturegroup onDemandFeaturegroup = null;
    CachedFeaturegroup cachedFeaturegroup = null;
    List<FeatureGroupFeatureDTO> featuresNoHudi = null;
    if (featuregroupDTO instanceof CachedFeaturegroupDTO) {
        // make copy of schema without hudi columns
        featuresNoHudi = new ArrayList<>(featuregroupDTO.getFeatures());
        cachedFeaturegroup = cachedFeaturegroupController.createCachedFeaturegroup(featurestore, (CachedFeaturegroupDTO) featuregroupDTO, project, user);
    } else {
        onDemandFeaturegroup = onDemandFeaturegroupController.createOnDemandFeaturegroup(featurestore, (OnDemandFeaturegroupDTO) featuregroupDTO, project, user);
    }
    // Persist basic feature group metadata
    Featuregroup featuregroup = persistFeaturegroupMetadata(featurestore, user, featuregroupDTO, cachedFeaturegroup, onDemandFeaturegroup);
    // online feature group needs to be set up after persisting metadata in order to get feature group id
    if (featuregroupDTO instanceof CachedFeaturegroupDTO && settings.isOnlineFeaturestore() && featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
        onlineFeaturegroupController.setupOnlineFeatureGroup(featurestore, featuregroup, featuresNoHudi, project, user);
    }
    FeaturegroupDTO completeFeaturegroupDTO = convertFeaturegrouptoDTO(featuregroup, project, user);
    // Extract metadata
    String hdfsUsername = hdfsUsersController.getHdfsUserName(project, user);
    DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsername);
    try {
        String fgPath = Utils.getFeaturestorePath(featurestore.getProject(), settings) + "/" + Utils.getFeaturegroupName(featuregroup);
        fsController.featuregroupAttachXAttrs(fgPath, completeFeaturegroupDTO, udfso);
    } finally {
        dfs.closeDfsClient(udfso);
    }
    // Log activity
    fsActivityFacade.logMetadataActivity(user, featuregroup, FeaturestoreActivityMeta.FG_CREATED, null);
    return completeFeaturegroupDTO;
}
Also used : OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)

Example 2 with OnDemandFeaturegroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup in project hopsworks by logicalclocks.

the class FeaturegroupController method persistFeaturegroupMetadata.

/**
 * Persists metadata of a new feature group in the feature_group table
 *
 * @param featurestore the featurestore of the feature group
 * @param user the Hopsworks user making the request
 * @param featuregroupDTO DTO of the feature group
 * @param cachedFeaturegroup the cached feature group that the feature group is linked to (if any)
 * @param onDemandFeaturegroup the on-demand feature group that the feature group is linked to (if any)
 * @return the created entity
 */
private Featuregroup persistFeaturegroupMetadata(Featurestore featurestore, Users user, FeaturegroupDTO featuregroupDTO, CachedFeaturegroup cachedFeaturegroup, OnDemandFeaturegroup onDemandFeaturegroup) throws FeaturestoreException {
    Featuregroup featuregroup = new Featuregroup();
    featuregroup.setName(featuregroupDTO.getName());
    featuregroup.setFeaturestore(featurestore);
    featuregroup.setCreated(new Date());
    featuregroup.setCreator(user);
    featuregroup.setVersion(featuregroupDTO.getVersion());
    if (featuregroupDTO.getValidationType() != null) {
        featuregroup.setValidationType(featuregroupDTO.getValidationType());
    }
    featuregroup.setFeaturegroupType(featuregroupDTO instanceof CachedFeaturegroupDTO ? FeaturegroupType.CACHED_FEATURE_GROUP : FeaturegroupType.ON_DEMAND_FEATURE_GROUP);
    featuregroup.setCachedFeaturegroup(cachedFeaturegroup);
    featuregroup.setOnDemandFeaturegroup(onDemandFeaturegroup);
    featuregroup.setEventTime(featuregroupDTO.getEventTime());
    StatisticsConfig statisticsConfig = new StatisticsConfig(featuregroupDTO.getStatisticsConfig().getEnabled(), featuregroupDTO.getStatisticsConfig().getCorrelations(), featuregroupDTO.getStatisticsConfig().getHistograms(), featuregroupDTO.getStatisticsConfig().getExactUniqueness());
    statisticsConfig.setFeaturegroup(featuregroup);
    statisticsConfig.setStatisticColumns(featuregroupDTO.getStatisticsConfig().getColumns().stream().map(sc -> new StatisticColumn(statisticsConfig, sc)).collect(Collectors.toList()));
    featuregroup.setStatisticsConfig(statisticsConfig);
    if (featuregroupDTO.getExpectationsNames() != null) {
        List<FeatureGroupExpectation> featureGroupExpectations = new ArrayList<>();
        for (String name : featuregroupDTO.getExpectationsNames()) {
            FeatureStoreExpectation featureStoreExpectation = featureGroupValidationsController.getFeatureStoreExpectation(featuregroup.getFeaturestore(), name);
            FeatureGroupExpectation featureGroupExpectation;
            Optional<FeatureGroupExpectation> e = featureGroupExpectationFacade.findByFeaturegroupAndExpectation(featuregroup, featureStoreExpectation);
            if (!e.isPresent()) {
                featureGroupExpectation = new FeatureGroupExpectation();
                featureGroupExpectation.setFeaturegroup(featuregroup);
                featureGroupExpectation.setFeatureStoreExpectation(featureStoreExpectation);
            } else {
                featureGroupExpectation = e.get();
            }
            featureGroupExpectations.add(featureGroupExpectation);
        }
        featuregroup.setExpectations(featureGroupExpectations);
    }
    featuregroupFacade.persist(featuregroup);
    return featuregroup;
}
Also used : StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) FeatureGroupExpectation(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupExpectation) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) ArrayList(java.util.ArrayList) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) FeatureStoreExpectation(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureStoreExpectation) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Date(java.util.Date)

Example 3 with OnDemandFeaturegroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup in project hopsworks by logicalclocks.

the class OnDemandFeaturegroupController method createOnDemandFeaturegroup.

/**
 * Persists an on demand feature group
 *
 * @param project
 * @param user
 * @param featurestore
 * @param onDemandFeaturegroupDTO the user input data to use when creating the feature group
 * @return the created entity
 */
public OnDemandFeaturegroup createOnDemandFeaturegroup(Featurestore featurestore, OnDemandFeaturegroupDTO onDemandFeaturegroupDTO, Project project, Users user) throws FeaturestoreException {
    // Verify User Input specific for on demand feature groups
    FeaturestoreConnector connector = getStorageConnector(onDemandFeaturegroupDTO.getStorageConnector().getId());
    // We allow users to read an entire S3 bucket for instance and they don't need to provide us with a query
    // However if you are running against a JDBC database, you need to provide a query
    boolean isJDBCType = (connector.getConnectorType() == FeaturestoreConnectorType.JDBC || connector.getConnectorType() == FeaturestoreConnectorType.REDSHIFT || connector.getConnectorType() == FeaturestoreConnectorType.SNOWFLAKE);
    if (connector.getConnectorType() == FeaturestoreConnectorType.KAFKA) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.COULD_NOT_CREATE_ON_DEMAND_FEATUREGROUP, Level.FINE, connector.getConnectorType() + " storage connectors are not supported as source for on demand " + "feature groups");
    } else if (Strings.isNullOrEmpty(onDemandFeaturegroupDTO.getQuery()) && isJDBCType) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.INVALID_SQL_QUERY, Level.FINE, "SQL Query cannot be empty");
    } else if (!Strings.isNullOrEmpty(onDemandFeaturegroupDTO.getQuery()) && !isJDBCType) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.INVALID_SQL_QUERY, Level.FINE, "SQL query not supported when specifying " + connector.getConnectorType() + " storage connectors");
    } else if (onDemandFeaturegroupDTO.getDataFormat() == null && !isJDBCType) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_ON_DEMAND_DATA_FORMAT, Level.FINE, "Data format required when specifying " + connector.getConnectorType() + " storage connectors");
    }
    // Persist on-demand featuregroup
    OnDemandFeaturegroup onDemandFeaturegroup = new OnDemandFeaturegroup();
    onDemandFeaturegroup.setDescription(onDemandFeaturegroupDTO.getDescription());
    onDemandFeaturegroup.setFeaturestoreConnector(connector);
    onDemandFeaturegroup.setQuery(onDemandFeaturegroupDTO.getQuery());
    onDemandFeaturegroup.setFeatures(convertOnDemandFeatures(onDemandFeaturegroupDTO, onDemandFeaturegroup));
    onDemandFeaturegroup.setInode(createFile(project, user, featurestore, onDemandFeaturegroupDTO));
    onDemandFeaturegroup.setDataFormat(onDemandFeaturegroupDTO.getDataFormat());
    onDemandFeaturegroup.setPath(onDemandFeaturegroupDTO.getPath());
    if (onDemandFeaturegroupDTO.getOptions() != null) {
        onDemandFeaturegroup.setOptions(onDemandFeaturegroupDTO.getOptions().stream().map(o -> new OnDemandOption(onDemandFeaturegroup, o.getName(), o.getValue())).collect(Collectors.toList()));
    }
    onDemandFeaturegroupFacade.persist(onDemandFeaturegroup);
    return onDemandFeaturegroup;
}
Also used : FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) OnDemandOption(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandOption) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Aggregations

OnDemandFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup)3 CachedFeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO)2 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)2 CachedFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)2 FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)1 OnDemandFeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO)1 DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)1 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)1 FeatureGroupExpectation (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupExpectation)1 FeatureStoreExpectation (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureStoreExpectation)1 OnDemandOption (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandOption)1 StatisticColumn (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn)1 StatisticsConfig (io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig)1 FeaturestoreConnector (io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1