use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit in project hopsworks by logicalclocks.
the class QueryController method convertQueryDTO.
/**
* Recursively convert the QueryDTO into the internal query representation
* @param queryDTO
* @return
*/
public Query convertQueryDTO(QueryDTO queryDTO, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
Integer fgId = queryDTO.getLeftFeatureGroup().getId();
Featuregroup fg = fgLookup.get(fgId);
String featureStore = featurestoreFacade.getHiveDbName(fg.getFeaturestore().getHiveDbId());
// used to build the online query - needs to respect the online db format name
String projectName = onlineFeaturestoreController.getOnlineFeaturestoreDbName(fg.getFeaturestore().getProject());
List<Feature> requestedFeatures = validateFeatures(fg, queryDTO.getLeftFeatures(), availableFeatureLookup.get(fgId));
Query query = new Query(featureStore, projectName, fg, fgAliasLookup.get(fgId), requestedFeatures, availableFeatureLookup.get(fgId), queryDTO.getHiveEngine());
if (fg.getCachedFeaturegroup() != null && fg.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
// if hudi and end hive engine, only possible to get latest snapshot else raise exception
if (queryDTO.getHiveEngine() && (queryDTO.getLeftFeatureGroupEndTime() != null || queryDTO.getJoins().stream().anyMatch(join -> join.getQuery().getLeftFeatureGroupEndTime() != null))) {
throw new IllegalArgumentException("Hive engine on Python environments does not support incremental or " + "snapshot queries. Read feature group without timestamp to retrieve latest snapshot or switch to " + "environment with Spark Engine.");
}
// If the feature group is hudi, validate and configure start and end commit id/timestamp
FeatureGroupCommit endCommit = featureGroupCommitCommitController.findCommitByDate(fg, queryDTO.getLeftFeatureGroupEndTime());
query.setLeftFeatureGroupEndTimestamp(endCommit.getCommittedOn());
query.setLeftFeatureGroupEndCommitId(endCommit.getFeatureGroupCommitPK().getCommitId());
if ((queryDTO.getJoins() == null || queryDTO.getJoins().isEmpty()) && queryDTO.getLeftFeatureGroupStartTime() != null) {
Long exactStartCommitTimestamp = featureGroupCommitCommitController.findCommitByDate(query.getFeaturegroup(), queryDTO.getLeftFeatureGroupStartTime()).getCommittedOn();
query.setLeftFeatureGroupStartTimestamp(exactStartCommitTimestamp);
} else if (queryDTO.getJoins() != null && queryDTO.getLeftFeatureGroupStartTime() != null) {
throw new IllegalArgumentException("For incremental queries start time must be provided and " + "join statements are not allowed");
}
}
// If there are any joins, recursively convert the Join's QueryDTO into the internal Query representation
if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
query.setJoins(convertJoins(query, queryDTO.getJoins(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled));
// remove duplicated join columns
removeDuplicateColumns(query, pitEnabled);
}
// If there are any filters, recursively convert the
if (queryDTO.getFilter() != null) {
query.setFilter(filterController.convertFilterLogic(queryDTO.getFilter(), fgLookup, availableFeatureLookup));
}
return query;
}
use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit in project hopsworks by logicalclocks.
the class StatisticsController method registerStatistics.
public FeaturestoreStatistic registerStatistics(Project project, Users user, Long statisticsCommitTimeStamp, Long fgCommitId, String content, Featuregroup featuregroup) throws FeaturestoreException, DatasetException, HopsSecurityException, IOException {
JSONObject statisticsJson = extractJsonFromContent(content);
FeatureGroupCommit featureGroupCommit = null;
if (featuregroup.getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && featuregroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
featureGroupCommit = featureGroupCommitCommitController.findCommitByDate(featuregroup, fgCommitId);
// Statistics commitTimeStamp will be always system time sent from client if user wants to recompute
// statistics on particular commit id (i.e. fgCommitId was provided). If fgCommitId is null
// it means: 1) client issued save or insert method; here statistics commitTimeStamp will be featureGroupCommit;
// 2) Or it is recomputing statistics of existing time travel enabled feature group. Here latest fg commit
// timestamp will be used to read dataset and as statistics commit time client system time will be provided.
// if statistics was never saved for this commit then it will return null
FeaturestoreStatistic statisticsFgCommit = featurestoreStatisticFacade.findFGStatisticsByCommitTime(featuregroup, featureGroupCommit.getCommittedOn()).orElse(null);
statisticsCommitTimeStamp = statisticsFgCommit == null ? featureGroupCommit.getCommittedOn() : statisticsCommitTimeStamp;
}
Inode statisticsInode = registerStatistics(project, user, statisticsCommitTimeStamp, statisticsJson.toString(), featuregroup.getName(), "FeatureGroups", featuregroup.getVersion(), null, false);
Timestamp commitTime = new Timestamp(statisticsCommitTimeStamp);
FeaturestoreStatistic featurestoreStatistic = new FeaturestoreStatistic(commitTime, statisticsInode, featuregroup);
if (featuregroup.getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP && featuregroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
featurestoreStatistic.setFeatureGroupCommit(featureGroupCommit);
}
featurestoreStatistic = featurestoreStatisticFacade.update(featurestoreStatistic);
// Log statistics activity
fsActivityFacade.logStatisticsActivity(user, featuregroup, new Date(commitTime.getTime()), featurestoreStatistic);
return featurestoreStatistic;
}
Aggregations