Search in sources :

Example 6 with FeatureGroupCommit

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit in project hopsworks by logicalclocks.

the class QueryController method convertQueryDTO.

/**
 * Recursively convert the QueryDTO into the internal query representation
 * @param queryDTO
 * @return
 */
public Query convertQueryDTO(QueryDTO queryDTO, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
    Integer fgId = queryDTO.getLeftFeatureGroup().getId();
    Featuregroup fg = fgLookup.get(fgId);
    String featureStore = featurestoreFacade.getHiveDbName(fg.getFeaturestore().getHiveDbId());
    // used to build the online query - needs to respect the online db format name
    String projectName = onlineFeaturestoreController.getOnlineFeaturestoreDbName(fg.getFeaturestore().getProject());
    List<Feature> requestedFeatures = validateFeatures(fg, queryDTO.getLeftFeatures(), availableFeatureLookup.get(fgId));
    Query query = new Query(featureStore, projectName, fg, fgAliasLookup.get(fgId), requestedFeatures, availableFeatureLookup.get(fgId), queryDTO.getHiveEngine());
    if (fg.getCachedFeaturegroup() != null && fg.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        // if hudi and end hive engine, only possible to get latest snapshot else raise exception
        if (queryDTO.getHiveEngine() && (queryDTO.getLeftFeatureGroupEndTime() != null || queryDTO.getJoins().stream().anyMatch(join -> join.getQuery().getLeftFeatureGroupEndTime() != null))) {
            throw new IllegalArgumentException("Hive engine on Python environments does not support incremental or " + "snapshot queries. Read feature group without timestamp to retrieve latest snapshot or switch to " + "environment with Spark Engine.");
        }
        // If the feature group is hudi, validate and configure start and end commit id/timestamp
        FeatureGroupCommit endCommit = featureGroupCommitCommitController.findCommitByDate(fg, queryDTO.getLeftFeatureGroupEndTime());
        query.setLeftFeatureGroupEndTimestamp(endCommit.getCommittedOn());
        query.setLeftFeatureGroupEndCommitId(endCommit.getFeatureGroupCommitPK().getCommitId());
        if ((queryDTO.getJoins() == null || queryDTO.getJoins().isEmpty()) && queryDTO.getLeftFeatureGroupStartTime() != null) {
            Long exactStartCommitTimestamp = featureGroupCommitCommitController.findCommitByDate(query.getFeaturegroup(), queryDTO.getLeftFeatureGroupStartTime()).getCommittedOn();
            query.setLeftFeatureGroupStartTimestamp(exactStartCommitTimestamp);
        } else if (queryDTO.getJoins() != null && queryDTO.getLeftFeatureGroupStartTime() != null) {
            throw new IllegalArgumentException("For incremental queries start time must be provided and " + "join statements are not allowed");
        }
    }
    // If there are any joins, recursively convert the Join's QueryDTO into the internal Query representation
    if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
        query.setJoins(convertJoins(query, queryDTO.getJoins(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled));
        // remove duplicated join columns
        removeDuplicateColumns(query, pitEnabled);
    }
    // If there are any filters, recursively convert the
    if (queryDTO.getFilter() != null) {
        query.setFilter(filterController.convertFilterLogic(queryDTO.getFilter(), fgLookup, availableFeatureLookup));
    }
    return query;
}
Also used : FeaturegroupFacade(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade) HashMap(java.util.HashMap) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeatureGroupCommitController(io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Collectors(java.util.stream.Collectors) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) Optional(java.util.Optional) Users(io.hops.hopsworks.persistence.entity.user.Users) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit)

Example 7 with FeatureGroupCommit

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit in project hopsworks by logicalclocks.

the class StatisticsController method registerStatistics.

public FeaturestoreStatistic registerStatistics(Project project, Users user, Long statisticsCommitTimeStamp, Long fgCommitId, String content, Featuregroup featuregroup) throws FeaturestoreException, DatasetException, HopsSecurityException, IOException {
    JSONObject statisticsJson = extractJsonFromContent(content);
    FeatureGroupCommit featureGroupCommit = null;
    if (featuregroup.getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && featuregroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featureGroupCommit = featureGroupCommitCommitController.findCommitByDate(featuregroup, fgCommitId);
        // Statistics commitTimeStamp will be always system time sent from client if user wants to recompute
        // statistics on particular commit id (i.e. fgCommitId was provided). If fgCommitId is null
        // it means: 1) client issued save or insert method; here statistics commitTimeStamp will be featureGroupCommit;
        // 2) Or it is recomputing statistics of existing time travel enabled feature group. Here latest fg commit
        // timestamp will be used to read dataset and as statistics commit time client system time will be provided.
        // if statistics was never saved for this commit then it will return null
        FeaturestoreStatistic statisticsFgCommit = featurestoreStatisticFacade.findFGStatisticsByCommitTime(featuregroup, featureGroupCommit.getCommittedOn()).orElse(null);
        statisticsCommitTimeStamp = statisticsFgCommit == null ? featureGroupCommit.getCommittedOn() : statisticsCommitTimeStamp;
    }
    Inode statisticsInode = registerStatistics(project, user, statisticsCommitTimeStamp, statisticsJson.toString(), featuregroup.getName(), "FeatureGroups", featuregroup.getVersion(), null, false);
    Timestamp commitTime = new Timestamp(statisticsCommitTimeStamp);
    FeaturestoreStatistic featurestoreStatistic = new FeaturestoreStatistic(commitTime, statisticsInode, featuregroup);
    if (featuregroup.getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && featuregroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featurestoreStatistic.setFeatureGroupCommit(featureGroupCommit);
    }
    featurestoreStatistic = featurestoreStatisticFacade.update(featurestoreStatistic);
    // Log statistics activity
    fsActivityFacade.logStatisticsActivity(user, featuregroup, new Date(commitTime.getTime()), featurestoreStatistic);
    return featurestoreStatistic;
}
Also used : Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) JSONObject(org.json.JSONObject) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Timestamp(java.sql.Timestamp) FeaturestoreStatistic(io.hops.hopsworks.persistence.entity.featurestore.statistics.FeaturestoreStatistic) Date(java.util.Date)

Aggregations

FeatureGroupCommit (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit)7 Inode (io.hops.hopsworks.persistence.entity.hdfs.inode.Inode)3 Timestamp (java.sql.Timestamp)3 Users (io.hops.hopsworks.persistence.entity.user.Users)2 Query (javax.persistence.Query)2 Strings (com.google.common.base.Strings)1 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)1 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)1 ResourceRequest (io.hops.hopsworks.common.api.ResourceRequest)1 FeaturestoreFacade (io.hops.hopsworks.common.featurestore.FeaturestoreFacade)1 FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)1 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)1 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)1 FeaturegroupFacade (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade)1 FeatureGroupCommitController (io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController)1 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)1 FilterController (io.hops.hopsworks.common.featurestore.query.filter.FilterController)1 Join (io.hops.hopsworks.common.featurestore.query.join.Join)1 JoinDTO (io.hops.hopsworks.common.featurestore.query.join.JoinDTO)1 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)1