use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.
the class TrainingDatasetController method convertToFilter.
private Filter convertToFilter(TrainingDatasetFilterCondition condition, Map<String, Feature> features) {
FilterValue filterValue;
if (condition.getValueFeatureGroupId() == null) {
filterValue = new FilterValue(condition.getValue());
} else {
Feature filterValueFeature = features.get(makeFeatureLookupKey(condition.getValueFeatureGroupId(), condition.getValue()));
filterValue = new FilterValue(condition.getValueFeatureGroupId(), filterValueFeature.getFgAlias(), condition.getValue());
}
return new Filter(features.get(makeFeatureLookupKey(condition.getFeatureGroup().getId(), condition.getFeature())), condition.getCondition(), filterValue);
}
use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.
the class PitJoinController method generateSQL.
public SqlNode generateSQL(Query query, boolean isTrainingDataset) {
// make a copy of base query to replace joins
Query baseQuery = new Query(query.getFeatureStore(), query.getProject(), query.getFeaturegroup(), query.getAs(), new ArrayList<>(query.getFeatures()), query.getAvailableFeatures(), query.getHiveEngine(), query.getFilter());
// collect left outer most features
List<Feature> finalSelectList = constructorController.collectFeatures(baseQuery);
// generate subqueries for WITH
List<SqlSelect> withSelects = wrapSubQueries(generateSubQueries(baseQuery, query, isTrainingDataset));
finalSelectList.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
// list for "x0 as ..."
SqlNodeList selectAsses = new SqlNodeList(SqlParserPos.ZERO);
// joins for the body of the WITH statement, bringing together the final result
List<Join> newJoins = new ArrayList<>();
// each sqlSelect represents one subquery corresponding to one join in the final WITH body
for (int i = 0; i < withSelects.size(); i++) {
selectAsses.add(SqlStdOperatorTable.AS.createCall(// mandatory when using "WITH xyz AS ()" therefore we need to add it manually as string here
SqlNodeList.of(new SqlIdentifier(FG_SUBQUERY + i + HIVE_AS, SqlParserPos.ZERO), withSelects.get(i))));
// each select corresponds to one join, collect features and update alias, drop event time features from "right"
// feature groups
String pitAlias = FG_SUBQUERY + i;
if (isTrainingDataset) {
// for training datasets all features are contained in final select list from beginning, set the correct
// alias only only for the features corresponding to the feature group in the current join
int finalI = i;
finalSelectList.stream().filter(f -> f.getFeatureGroup() == query.getJoins().get(finalI).getRightQuery().getFeaturegroup()).forEach(f -> f.setPitFgAlias(pitAlias));
} else {
List<Feature> features = constructorController.collectFeatures(query.getJoins().get(i).getRightQuery());
features.forEach(f -> f.setPitFgAlias(pitAlias));
finalSelectList.addAll(features);
}
// add event time inequality join condition
List<Feature> primaryKey = baseQuery.getAvailableFeatures().stream().filter(Feature::isPrimary).collect(Collectors.toList());
List<Feature> newLeftOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
renameJoinFeatures(newLeftOn);
// equivalent copy, but needed to be able to set different alias
List<Feature> newRightOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
renameJoinFeatures(newRightOn);
List<SqlCondition> newJoinOperator = newLeftOn.stream().map(f -> SqlCondition.EQUALS).collect(Collectors.toList());
newLeftOn.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
newRightOn.forEach(f -> f.setPitFgAlias(pitAlias));
newJoins.add(new Join(null, null, newLeftOn, newRightOn, JoinType.INNER, null, newJoinOperator));
}
// sort features in last select
if (isTrainingDataset) {
finalSelectList = finalSelectList.stream().sorted(Comparator.comparing(Feature::getIdx)).collect(Collectors.toList());
}
SqlNodeList selectList = new SqlNodeList(SqlParserPos.ZERO);
for (Feature f : finalSelectList) {
String featurePrefixed;
if (!Strings.isNullOrEmpty(f.getPrefix())) {
featurePrefixed = f.getPrefix() + f.getName();
} else {
featurePrefixed = f.getName();
}
selectList.add(new SqlIdentifier(Arrays.asList("`" + f.getFgAlias(true) + "`", "`" + featurePrefixed + "`"), SqlParserPos.ZERO));
}
SqlSelect body = new SqlSelect(SqlParserPos.ZERO, null, selectList, buildWithJoin(newJoins, newJoins.size() - 1), null, null, null, null, null, null, null, null);
return new SqlWith(SqlParserPos.ZERO, selectAsses, body);
}
use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.
the class PitJoinController method wrapSubQueries.
public List<SqlSelect> wrapSubQueries(List<SqlCall> sqlSelects) {
List<SqlSelect> newSubQueries = new ArrayList<>();
for (SqlCall select : sqlSelects) {
SqlNode whereRank = filterController.generateFilterNode(new Filter(Arrays.asList(new Feature(PIT_JOIN_RANK, null, "int", null, null)), SqlCondition.EQUALS, "1"), false);
SqlNodeList selectList = SqlNodeList.of(new SqlIdentifier(ALL_FEATURES, SqlParserPos.ZERO));
newSubQueries.add(new SqlSelect(SqlParserPos.ZERO, null, selectList, select, whereRank, null, null, null, null, null, null, null));
}
return newSubQueries;
}
use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method getServingStatements.
private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
// Check that all the feature groups still exists, if not throw a reasonable error
if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// each join is a feature group, iterate over them.
for (TrainingDatasetJoin join : joins) {
Featuregroup featuregroup = join.getFeatureGroup();
if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
}
Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
// Identify and create primary key features for this feature group. Primary key features may not be the part of
// query that generated the training dataset.
List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
if (primaryKeys.size() == 0) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
}
// create td features
List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
// part of the prepared statement thus don't add to this query.
if (selectFeatures.size() > 0) {
// construct query for this feature group
Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
// construct ServingPreparedStatementDTO and add to the list
servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
}
}
return servingPreparedStatementDTOS;
}
use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method buildDTO.
private ServingPreparedStatementDTO buildDTO(Query query, List<Feature> primaryKeys, Integer featureGroupId, Integer statementIndex, boolean batch) throws FeaturestoreException {
// create primary key prepared statement filters for the query
List<PreparedStatementParameterDTO> stmtParameters = new ArrayList<>();
// Change the type of PK to PREPARED_STATEMENT_TYPE. This will avoid having the query constructor
// adding additional quotes around the ? sign
primaryKeys.forEach(f -> f.setType(PREPARED_STATEMENT_TYPE));
// record pk position in the prepared statement - start from 1 as that's how
// prepared statements work.
int primaryKeyIndex = 1;
// First condition doesn't have any "AND"
// we are guaranteed there is at least one primary key, as no primary key situations are filtered above
Feature pkFeature = primaryKeys.get(0);
stmtParameters.add(new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++));
FilterLogic filterLogic;
if (batch) {
filterLogic = new FilterLogic(new Filter(primaryKeys, SqlCondition.IN, "?"));
query.setOrderByFeatures(primaryKeys);
} else {
filterLogic = new FilterLogic(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?"));
}
// Concatenate conditions
for (int i = 1; i < primaryKeys.size(); i++) {
pkFeature = primaryKeys.get(i);
if (!batch) {
filterLogic = filterLogic.and(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?"));
}
stmtParameters.add(new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++));
}
query.setFilter(filterLogic);
// set prepared statement parameters
return new ServingPreparedStatementDTO(featureGroupId, statementIndex, stmtParameters, constructorController.generateSQL(query, true).toSqlString(new MysqlSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql());
}
Aggregations