use of io.hops.hopsworks.common.featurestore.query.Feature in project hopsworks by logicalclocks.
the class TrainingDatasetController method convertToFilter.
private Filter convertToFilter(TrainingDatasetFilterCondition condition, Map<String, Feature> features) {
FilterValue filterValue;
if (condition.getValueFeatureGroupId() == null) {
filterValue = new FilterValue(condition.getValue());
} else {
Feature filterValueFeature = features.get(makeFeatureLookupKey(condition.getValueFeatureGroupId(), condition.getValue()));
filterValue = new FilterValue(condition.getValueFeatureGroupId(), filterValueFeature.getFgAlias(), condition.getValue());
}
return new Filter(features.get(makeFeatureLookupKey(condition.getFeatureGroup().getId(), condition.getFeature())), condition.getCondition(), filterValue);
}
use of io.hops.hopsworks.common.featurestore.query.Feature in project hopsworks by logicalclocks.
the class TrainingDatasetController method collectFeatures.
// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
List<TrainingDatasetFeature> features = new ArrayList<>();
boolean isLabel = false;
TransformationFunction transformationFunction = null;
for (Feature f : query.getFeatures()) {
if (featureDTOs != null && !featureDTOs.isEmpty()) {
// identify if feature is label
isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
// get transformation function for this feature
transformationFunction = getTransformationFunction(f, featureDTOs);
}
features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
}
if (query.getJoins() != null) {
for (Join join : query.getJoins()) {
joinIndex++;
List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
features.addAll(joinFeatures);
featureIndex += joinFeatures.size();
}
}
return features;
}
use of io.hops.hopsworks.common.featurestore.query.Feature in project hopsworks by logicalclocks.
the class TrainingDatasetController method getQueryJoin.
// Rebuild query object so that the query constructor can be build the string
public Join getQueryJoin(Query leftQuery, TrainingDatasetJoin rightTdJoin, Map<Integer, String> fgAliasLookup, Map<Integer, String> fsLookup, Map<Integer, List<Feature>> availableFeaturesLookup, Boolean isHiveEngine) throws FeaturestoreException {
String rightAs = fgAliasLookup.get(rightTdJoin.getId());
Query rightQuery = new Query(fsLookup.get(rightTdJoin.getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(rightTdJoin.getFeatureGroup().getFeaturestore().getProject()), rightTdJoin.getFeatureGroup(), rightAs, // no requested features as they are all in the left base query
new ArrayList<>(), availableFeaturesLookup.get(rightTdJoin.getFeatureGroup().getId()), isHiveEngine);
List<Feature> leftOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getLeftFeature())).collect(Collectors.toList());
List<Feature> rightOn = rightTdJoin.getConditions().stream().map(c -> new Feature(c.getRightFeature())).collect(Collectors.toList());
JoinType joinType = JoinType.values()[rightTdJoin.getType()];
return queryController.extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinType, rightTdJoin.getPrefix());
}
use of io.hops.hopsworks.common.featurestore.query.Feature in project hopsworks by logicalclocks.
the class PitJoinController method generateSQL.
public SqlNode generateSQL(Query query, boolean isTrainingDataset) {
// make a copy of base query to replace joins
Query baseQuery = new Query(query.getFeatureStore(), query.getProject(), query.getFeaturegroup(), query.getAs(), new ArrayList<>(query.getFeatures()), query.getAvailableFeatures(), query.getHiveEngine(), query.getFilter());
// collect left outer most features
List<Feature> finalSelectList = constructorController.collectFeatures(baseQuery);
// generate subqueries for WITH
List<SqlSelect> withSelects = wrapSubQueries(generateSubQueries(baseQuery, query, isTrainingDataset));
finalSelectList.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
// list for "x0 as ..."
SqlNodeList selectAsses = new SqlNodeList(SqlParserPos.ZERO);
// joins for the body of the WITH statement, bringing together the final result
List<Join> newJoins = new ArrayList<>();
// each sqlSelect represents one subquery corresponding to one join in the final WITH body
for (int i = 0; i < withSelects.size(); i++) {
selectAsses.add(SqlStdOperatorTable.AS.createCall(// mandatory when using "WITH xyz AS ()" therefore we need to add it manually as string here
SqlNodeList.of(new SqlIdentifier(FG_SUBQUERY + i + HIVE_AS, SqlParserPos.ZERO), withSelects.get(i))));
// each select corresponds to one join, collect features and update alias, drop event time features from "right"
// feature groups
String pitAlias = FG_SUBQUERY + i;
if (isTrainingDataset) {
// for training datasets all features are contained in final select list from beginning, set the correct
// alias only only for the features corresponding to the feature group in the current join
int finalI = i;
finalSelectList.stream().filter(f -> f.getFeatureGroup() == query.getJoins().get(finalI).getRightQuery().getFeaturegroup()).forEach(f -> f.setPitFgAlias(pitAlias));
} else {
List<Feature> features = constructorController.collectFeatures(query.getJoins().get(i).getRightQuery());
features.forEach(f -> f.setPitFgAlias(pitAlias));
finalSelectList.addAll(features);
}
// add event time inequality join condition
List<Feature> primaryKey = baseQuery.getAvailableFeatures().stream().filter(Feature::isPrimary).collect(Collectors.toList());
List<Feature> newLeftOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
renameJoinFeatures(newLeftOn);
// equivalent copy, but needed to be able to set different alias
List<Feature> newRightOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
renameJoinFeatures(newRightOn);
List<SqlCondition> newJoinOperator = newLeftOn.stream().map(f -> SqlCondition.EQUALS).collect(Collectors.toList());
newLeftOn.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
newRightOn.forEach(f -> f.setPitFgAlias(pitAlias));
newJoins.add(new Join(null, null, newLeftOn, newRightOn, JoinType.INNER, null, newJoinOperator));
}
// sort features in last select
if (isTrainingDataset) {
finalSelectList = finalSelectList.stream().sorted(Comparator.comparing(Feature::getIdx)).collect(Collectors.toList());
}
SqlNodeList selectList = new SqlNodeList(SqlParserPos.ZERO);
for (Feature f : finalSelectList) {
String featurePrefixed;
if (!Strings.isNullOrEmpty(f.getPrefix())) {
featurePrefixed = f.getPrefix() + f.getName();
} else {
featurePrefixed = f.getName();
}
selectList.add(new SqlIdentifier(Arrays.asList("`" + f.getFgAlias(true) + "`", "`" + featurePrefixed + "`"), SqlParserPos.ZERO));
}
SqlSelect body = new SqlSelect(SqlParserPos.ZERO, null, selectList, buildWithJoin(newJoins, newJoins.size() - 1), null, null, null, null, null, null, null, null);
return new SqlWith(SqlParserPos.ZERO, selectAsses, body);
}
use of io.hops.hopsworks.common.featurestore.query.Feature in project hopsworks by logicalclocks.
the class PitJoinController method wrapSubQueries.
public List<SqlSelect> wrapSubQueries(List<SqlCall> sqlSelects) {
List<SqlSelect> newSubQueries = new ArrayList<>();
for (SqlCall select : sqlSelects) {
SqlNode whereRank = filterController.generateFilterNode(new Filter(Arrays.asList(new Feature(PIT_JOIN_RANK, null, "int", null, null)), SqlCondition.EQUALS, "1"), false);
SqlNodeList selectList = SqlNodeList.of(new SqlIdentifier(ALL_FEATURES, SqlParserPos.ZERO));
newSubQueries.add(new SqlSelect(SqlParserPos.ZERO, null, selectList, select, whereRank, null, null, null, null, null, null, null));
}
return newSubQueries;
}
Aggregations