Search in sources :

Example 1 with FeatureGroupFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO in project hopsworks by logicalclocks.

the class PreparedStatementBuilder method getServingStatements.

private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
    if (!trainingDataset.isQuery()) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
    }
    List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
    List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
    // Check that all the feature groups still exists, if not throw a reasonable error
    if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
    }
    // each join is a feature group, iterate over them.
    for (TrainingDatasetJoin join : joins) {
        Featuregroup featuregroup = join.getFeatureGroup();
        if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
        }
        Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
        // Identify and create primary key features for this feature group. Primary key features may not be the part of
        // query that generated the training dataset.
        List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
        if (primaryKeys.size() == 0) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
        }
        // create td features
        List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
        // part of the prepared statement thus don't add to this query.
        if (selectFeatures.size() > 0) {
            // construct query for this feature group
            Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
            // construct ServingPreparedStatementDTO and add to the list
            servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
        }
    }
    return servingPreparedStatementDTOS;
}
Also used : Arrays(java.util.Arrays) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreController(io.hops.hopsworks.common.featurestore.FeaturestoreController) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) ResourceRequest(io.hops.hopsworks.common.api.ResourceRequest) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) URI(java.net.URI) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) Collectors(java.util.stream.Collectors) SqlDialect(org.apache.calcite.sql.SqlDialect) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) PreparedStatementParameterDTO(io.hops.hopsworks.common.featurestore.query.PreparedStatementParameterDTO) MysqlSqlDialect(org.apache.calcite.sql.dialect.MysqlSqlDialect) UriInfo(javax.ws.rs.core.UriInfo) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) Query(io.hops.hopsworks.common.featurestore.query.Query) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 2 with FeatureGroupFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method enableFeaturegroupOnline.

/**
 * Update a cached featuregroup that currently does not support online feature serving, to support it.
 *
 * @param featurestore the featurestore where the featuregroup resides
 * @param featuregroup the featuregroup entity to update
 * @param user the user making the request
 * @return a DTO of the updated featuregroup
 * @throws FeaturestoreException
 * @throws SQLException
 */
public FeaturegroupDTO enableFeaturegroupOnline(Featurestore featurestore, Featuregroup featuregroup, Project project, Users user) throws FeaturestoreException, SQLException, ServiceException, KafkaException, SchemaException, ProjectException, UserException, IOException, HopsSecurityException {
    if (!settings.isOnlineFeaturestore()) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Online Featurestore is not enabled for this Hopsworks cluster.");
    }
    if (!onlineFeaturestoreController.checkIfDatabaseExists(onlineFeaturestoreController.getOnlineFeaturestoreDbName(featurestore.getProject()))) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Online Featurestore is not enabled for this project. To enable online feature store, talk to an " + "administrator.");
    }
    CachedFeaturegroup cachedFeaturegroup = featuregroup.getCachedFeaturegroup();
    List<FeatureGroupFeatureDTO> features = getFeaturesDTO(featuregroup, project, user);
    if (cachedFeaturegroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        features = dropHudiSpecFeatureGroupFeature(features);
    }
    if (!cachedFeaturegroup.isOnlineEnabled()) {
        onlineFeaturegroupController.setupOnlineFeatureGroup(featurestore, featuregroup, features, project, user);
    }
    // Set foreign key of the cached feature group to the new online feature group
    cachedFeaturegroup.setOnlineEnabled(true);
    cachedFeatureGroupFacade.updateMetadata(cachedFeaturegroup);
    return convertCachedFeaturegroupToDTO(featuregroup, project, user);
}
Also used : FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)

Example 3 with FeatureGroupFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method buildFeatureExtraConstrains.

private List<CachedFeatureExtraConstraints> buildFeatureExtraConstrains(List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS, CachedFeaturegroup cachedFeaturegroup) {
    List<CachedFeatureExtraConstraints> cachedFeatureExtraConstraints = new ArrayList<>();
    List<String> pkNames = featureGroupFeatureDTOS.stream().filter(FeatureGroupFeatureDTO::getPrimary).map(FeatureGroupFeatureDTO::getName).collect(Collectors.toList());
    // hudi precombine key is always one feature
    String hudiPrecombineKeyName = featureGroupFeatureDTOS.stream().filter(FeatureGroupFeatureDTO::getHudiPrecombineKey).map(FeatureGroupFeatureDTO::getName).findFirst().orElse(null);
    boolean primaryKeyIsHudiPrecombineKey = false;
    if (cachedFeaturegroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        if (hudiPrecombineKeyName == null) {
            // hudi precombine key is always one feature, we pick up 1st primary key
            hudiPrecombineKeyName = pkNames.get(0);
            primaryKeyIsHudiPrecombineKey = true;
        } else {
            // User may set primary key as precombine key
            primaryKeyIsHudiPrecombineKey = pkNames.contains(hudiPrecombineKeyName);
        }
    }
    for (String pkName : pkNames) {
        cachedFeatureExtraConstraints.add(new CachedFeatureExtraConstraints(cachedFeaturegroup, pkName, true, pkName.equals(hudiPrecombineKeyName)));
    }
    if (!primaryKeyIsHudiPrecombineKey && cachedFeaturegroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        cachedFeatureExtraConstraints.add(new CachedFeatureExtraConstraints(cachedFeaturegroup, hudiPrecombineKeyName, false, true));
    }
    return cachedFeatureExtraConstraints;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) ArrayList(java.util.ArrayList)

Example 4 with FeatureGroupFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method convertCachedFeaturegroupToDTO.

/**
 * Converts a CachedFeaturegroup entity into a DTO representation
 *
 * @param featuregroup the entity to convert
 * @return the converted DTO representation
 */
public CachedFeaturegroupDTO convertCachedFeaturegroupToDTO(Featuregroup featuregroup, Project project, Users user) throws FeaturestoreException, ServiceException {
    CachedFeaturegroupDTO cachedFeaturegroupDTO = new CachedFeaturegroupDTO(featuregroup);
    List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = getFeaturesDTO(featuregroup, project, user);
    if (settings.isOnlineFeaturestore() && featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
        cachedFeaturegroupDTO.setOnlineEnabled(true);
        cachedFeaturegroupDTO.setOnlineTopicName(onlineFeaturegroupController.onlineFeatureGroupTopicName(project.getId(), featuregroup.getId(), Utils.getFeaturegroupName(featuregroup)));
        List<FeatureGroupFeatureDTO> onlineFeatureGroupFeatureDTOS = onlineFeaturegroupController.getFeaturegroupFeatures(featuregroup);
        for (FeatureGroupFeatureDTO featureGroupFeatureDTO : featureGroupFeatureDTOS) {
            for (FeatureGroupFeatureDTO onlineFeatureGroupFeatureDTO : onlineFeatureGroupFeatureDTOS) {
                if (featureGroupFeatureDTO.getName().equalsIgnoreCase(onlineFeatureGroupFeatureDTO.getName())) {
                    featureGroupFeatureDTO.setOnlineType(onlineFeatureGroupFeatureDTO.getType());
                }
            }
        }
    }
    cachedFeaturegroupDTO.setFeatures(featureGroupFeatureDTOS);
    cachedFeaturegroupDTO.setName(featuregroup.getName());
    cachedFeaturegroupDTO.setTimeTravelFormat(featuregroup.getCachedFeaturegroup().getTimeTravelFormat());
    cachedFeaturegroupDTO.setValidationType(featuregroup.getValidationType());
    cachedFeaturegroupDTO.setDescription(featuregroup.getCachedFeaturegroup().getHiveTbls().getHiveTableParamsCollection().stream().filter(p -> p.getHiveTableParamsPK().getParamKey().equalsIgnoreCase("COMMENT")).map(HiveTableParams::getParamValue).findFirst().orElse(""));
    cachedFeaturegroupDTO.setLocation(featurestoreUtils.resolveLocationURI(featuregroup.getCachedFeaturegroup().getHiveTbls().getSdId().getLocation()));
    return cachedFeaturegroupDTO;
}
Also used : FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)

Example 5 with FeatureGroupFeatureDTO

use of io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method getWhereCondition.

public SqlNode getWhereCondition(String partition, List<FeatureGroupFeatureDTO> features) throws FeaturestoreException {
    if (Strings.isNullOrEmpty(partition)) {
        // user didn't ask for a specific partition
        return null;
    }
    // partition names are separated by /, column=VALUE/column=VALUE
    SqlNodeList whereClauses = new SqlNodeList(SqlParserPos.ZERO);
    String[] splits = partition.split("/");
    for (String split : splits) {
        int posEqual = split.indexOf("=");
        String column = split.substring(0, posEqual);
        FeatureGroupFeatureDTO partitionFeature = features.stream().filter(FeatureGroupFeatureDTO::getPartition).filter(feature -> feature.getName().equals(column)).findFirst().orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ILLEGAL_FEATURE_NAME, Level.FINE, "The selected partition column: " + column + " was not found among the partition columns of the feature " + "group."));
        SqlNode value;
        if (partitionFeature.getType().equalsIgnoreCase("string")) {
            value = SqlLiteral.createCharString(split.substring(posEqual + 1), SqlParserPos.ZERO);
        } else {
            value = new SqlIdentifier(split.substring(posEqual + 1), SqlParserPos.ZERO);
        }
        whereClauses.add(SqlStdOperatorTable.EQUALS.createCall(SqlParserPos.ZERO, new SqlIdentifier("`" + column + "`", SqlParserPos.ZERO), value));
    }
    if (whereClauses.size() == 1) {
        return whereClauses;
    }
    return SqlStdOperatorTable.AND.createCall(whereClauses);
}
Also used : FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) SqlNodeList(org.apache.calcite.sql.SqlNodeList) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) SqlNode(org.apache.calcite.sql.SqlNode)

Aggregations

FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)59 ArrayList (java.util.ArrayList)34 Test (org.junit.Test)33 Feature (io.hops.hopsworks.common.featurestore.query.Feature)8 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)8 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)7 CachedFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)7 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)5 Before (org.junit.Before)5 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)4 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)4 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)3 CachedFeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO)3 FilterController (io.hops.hopsworks.common.featurestore.query.filter.FilterController)3 Featurestore (io.hops.hopsworks.persistence.entity.featurestore.Featurestore)3 CachedFeature (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature)3 CachedFeatureExtraConstraints (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints)3 Project (io.hops.hopsworks.persistence.entity.project.Project)3 Users (io.hops.hopsworks.persistence.entity.user.Users)3 IOException (java.io.IOException)3