Search in sources :

Example 1 with Filter

use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.

the class TrainingDatasetController method convertToFilter.

private Filter convertToFilter(TrainingDatasetFilterCondition condition, Map<String, Feature> features) {
    FilterValue filterValue;
    if (condition.getValueFeatureGroupId() == null) {
        filterValue = new FilterValue(condition.getValue());
    } else {
        Feature filterValueFeature = features.get(makeFeatureLookupKey(condition.getValueFeatureGroupId(), condition.getValue()));
        filterValue = new FilterValue(condition.getValueFeatureGroupId(), filterValueFeature.getFgAlias(), condition.getValue());
    }
    return new Filter(features.get(makeFeatureLookupKey(condition.getFeatureGroup().getId(), condition.getFeature())), condition.getCondition(), filterValue);
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) FilterValue(io.hops.hopsworks.common.featurestore.query.filter.FilterValue)

Example 2 with Filter

use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.

the class PitJoinController method generateSQL.

public SqlNode generateSQL(Query query, boolean isTrainingDataset) {
    // make a copy of base query to replace joins
    Query baseQuery = new Query(query.getFeatureStore(), query.getProject(), query.getFeaturegroup(), query.getAs(), new ArrayList<>(query.getFeatures()), query.getAvailableFeatures(), query.getHiveEngine(), query.getFilter());
    // collect left outer most features
    List<Feature> finalSelectList = constructorController.collectFeatures(baseQuery);
    // generate subqueries for WITH
    List<SqlSelect> withSelects = wrapSubQueries(generateSubQueries(baseQuery, query, isTrainingDataset));
    finalSelectList.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
    // list for "x0 as ..."
    SqlNodeList selectAsses = new SqlNodeList(SqlParserPos.ZERO);
    // joins for the body of the WITH statement, bringing together the final result
    List<Join> newJoins = new ArrayList<>();
    // each sqlSelect represents one subquery corresponding to one join in the final WITH body
    for (int i = 0; i < withSelects.size(); i++) {
        selectAsses.add(SqlStdOperatorTable.AS.createCall(// mandatory when using "WITH xyz AS ()" therefore we need to add it manually as string here
        SqlNodeList.of(new SqlIdentifier(FG_SUBQUERY + i + HIVE_AS, SqlParserPos.ZERO), withSelects.get(i))));
        // each select corresponds to one join, collect features and update alias, drop event time features from "right"
        // feature groups
        String pitAlias = FG_SUBQUERY + i;
        if (isTrainingDataset) {
            // for training datasets all features are contained in final select list from beginning, set the correct
            // alias only only for the features corresponding to the feature group in the current join
            int finalI = i;
            finalSelectList.stream().filter(f -> f.getFeatureGroup() == query.getJoins().get(finalI).getRightQuery().getFeaturegroup()).forEach(f -> f.setPitFgAlias(pitAlias));
        } else {
            List<Feature> features = constructorController.collectFeatures(query.getJoins().get(i).getRightQuery());
            features.forEach(f -> f.setPitFgAlias(pitAlias));
            finalSelectList.addAll(features);
        }
        // add event time inequality join condition
        List<Feature> primaryKey = baseQuery.getAvailableFeatures().stream().filter(Feature::isPrimary).collect(Collectors.toList());
        List<Feature> newLeftOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
        renameJoinFeatures(newLeftOn);
        // equivalent copy, but needed to be able to set different alias
        List<Feature> newRightOn = addEventTimeOn(primaryKey, baseQuery.getFeaturegroup(), baseQuery.getAs());
        renameJoinFeatures(newRightOn);
        List<SqlCondition> newJoinOperator = newLeftOn.stream().map(f -> SqlCondition.EQUALS).collect(Collectors.toList());
        newLeftOn.forEach(f -> f.setPitFgAlias(FG_SUBQUERY + "0"));
        newRightOn.forEach(f -> f.setPitFgAlias(pitAlias));
        newJoins.add(new Join(null, null, newLeftOn, newRightOn, JoinType.INNER, null, newJoinOperator));
    }
    // sort features in last select
    if (isTrainingDataset) {
        finalSelectList = finalSelectList.stream().sorted(Comparator.comparing(Feature::getIdx)).collect(Collectors.toList());
    }
    SqlNodeList selectList = new SqlNodeList(SqlParserPos.ZERO);
    for (Feature f : finalSelectList) {
        String featurePrefixed;
        if (!Strings.isNullOrEmpty(f.getPrefix())) {
            featurePrefixed = f.getPrefix() + f.getName();
        } else {
            featurePrefixed = f.getName();
        }
        selectList.add(new SqlIdentifier(Arrays.asList("`" + f.getFgAlias(true) + "`", "`" + featurePrefixed + "`"), SqlParserPos.ZERO));
    }
    SqlSelect body = new SqlSelect(SqlParserPos.ZERO, null, selectList, buildWithJoin(newJoins, newJoins.size() - 1), null, null, null, null, null, null, null, null);
    return new SqlWith(SqlParserPos.ZERO, selectAsses, body);
}
Also used : Arrays(java.util.Arrays) JoinConditionType(org.apache.calcite.sql.JoinConditionType) Feature(io.hops.hopsworks.common.featurestore.query.Feature) JoinController(io.hops.hopsworks.common.featurestore.query.join.JoinController) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlCall(org.apache.calcite.sql.SqlCall) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlNode(org.apache.calcite.sql.SqlNode) SqlWith(org.apache.calcite.sql.SqlWith) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) SqlWindow(org.apache.calcite.sql.SqlWindow) SqlSelect(org.apache.calcite.sql.SqlSelect) EJB(javax.ejb.EJB) JoinType(org.apache.calcite.sql.JoinType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) Join(io.hops.hopsworks.common.featurestore.query.join.Join) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Collectors(java.util.stream.Collectors) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) SqlJoin(org.apache.calcite.sql.SqlJoin) Comparator(java.util.Comparator) SqlNodeList(org.apache.calcite.sql.SqlNodeList) Collections(java.util.Collections) Query(io.hops.hopsworks.common.featurestore.query.Query) SqlWith(org.apache.calcite.sql.SqlWith) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlJoin(org.apache.calcite.sql.SqlJoin) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) SqlSelect(org.apache.calcite.sql.SqlSelect) SqlNodeList(org.apache.calcite.sql.SqlNodeList)

Example 3 with Filter

use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.

the class PitJoinController method wrapSubQueries.

public List<SqlSelect> wrapSubQueries(List<SqlCall> sqlSelects) {
    List<SqlSelect> newSubQueries = new ArrayList<>();
    for (SqlCall select : sqlSelects) {
        SqlNode whereRank = filterController.generateFilterNode(new Filter(Arrays.asList(new Feature(PIT_JOIN_RANK, null, "int", null, null)), SqlCondition.EQUALS, "1"), false);
        SqlNodeList selectList = SqlNodeList.of(new SqlIdentifier(ALL_FEATURES, SqlParserPos.ZERO));
        newSubQueries.add(new SqlSelect(SqlParserPos.ZERO, null, selectList, select, whereRank, null, null, null, null, null, null, null));
    }
    return newSubQueries;
}
Also used : SqlSelect(org.apache.calcite.sql.SqlSelect) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) SqlCall(org.apache.calcite.sql.SqlCall) ArrayList(java.util.ArrayList) SqlNodeList(org.apache.calcite.sql.SqlNodeList) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlNode(org.apache.calcite.sql.SqlNode)

Example 4 with Filter

use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.

the class PreparedStatementBuilder method getServingStatements.

private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
    if (!trainingDataset.isQuery()) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
    }
    List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
    List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
    // Check that all the feature groups still exists, if not throw a reasonable error
    if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
    }
    // each join is a feature group, iterate over them.
    for (TrainingDatasetJoin join : joins) {
        Featuregroup featuregroup = join.getFeatureGroup();
        if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
        }
        Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
        // Identify and create primary key features for this feature group. Primary key features may not be the part of
        // query that generated the training dataset.
        List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
        if (primaryKeys.size() == 0) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
        }
        // create td features
        List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
        // part of the prepared statement thus don't add to this query.
        if (selectFeatures.size() > 0) {
            // construct query for this feature group
            Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
            // construct ServingPreparedStatementDTO and add to the list
            servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
        }
    }
    return servingPreparedStatementDTOS;
}
Also used : Arrays(java.util.Arrays) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreController(io.hops.hopsworks.common.featurestore.FeaturestoreController) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) ResourceRequest(io.hops.hopsworks.common.api.ResourceRequest) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) URI(java.net.URI) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) Collectors(java.util.stream.Collectors) SqlDialect(org.apache.calcite.sql.SqlDialect) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) PreparedStatementParameterDTO(io.hops.hopsworks.common.featurestore.query.PreparedStatementParameterDTO) MysqlSqlDialect(org.apache.calcite.sql.dialect.MysqlSqlDialect) UriInfo(javax.ws.rs.core.UriInfo) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) Query(io.hops.hopsworks.common.featurestore.query.Query) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 5 with Filter

use of io.hops.hopsworks.common.featurestore.query.filter.Filter in project hopsworks by logicalclocks.

the class PreparedStatementBuilder method buildDTO.

private ServingPreparedStatementDTO buildDTO(Query query, List<Feature> primaryKeys, Integer featureGroupId, Integer statementIndex, boolean batch) throws FeaturestoreException {
    // create primary key prepared statement filters for the query
    List<PreparedStatementParameterDTO> stmtParameters = new ArrayList<>();
    // Change the type of PK to PREPARED_STATEMENT_TYPE. This will avoid having the query constructor
    // adding additional quotes around the ? sign
    primaryKeys.forEach(f -> f.setType(PREPARED_STATEMENT_TYPE));
    // record pk position in the prepared statement - start from 1 as that's how
    // prepared statements work.
    int primaryKeyIndex = 1;
    // First condition doesn't have any "AND"
    // we are guaranteed there is at least one primary key, as no primary key situations are filtered above
    Feature pkFeature = primaryKeys.get(0);
    stmtParameters.add(new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++));
    FilterLogic filterLogic;
    if (batch) {
        filterLogic = new FilterLogic(new Filter(primaryKeys, SqlCondition.IN, "?"));
        query.setOrderByFeatures(primaryKeys);
    } else {
        filterLogic = new FilterLogic(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?"));
    }
    // Concatenate conditions
    for (int i = 1; i < primaryKeys.size(); i++) {
        pkFeature = primaryKeys.get(i);
        if (!batch) {
            filterLogic = filterLogic.and(new Filter(Arrays.asList(pkFeature), SqlCondition.EQUALS, "?"));
        }
        stmtParameters.add(new PreparedStatementParameterDTO(pkFeature.getName(), primaryKeyIndex++));
    }
    query.setFilter(filterLogic);
    // set prepared statement parameters
    return new ServingPreparedStatementDTO(featureGroupId, statementIndex, stmtParameters, constructorController.generateSQL(query, true).toSqlString(new MysqlSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql());
}
Also used : Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) ArrayList(java.util.ArrayList) MysqlSqlDialect(org.apache.calcite.sql.dialect.MysqlSqlDialect) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) PreparedStatementParameterDTO(io.hops.hopsworks.common.featurestore.query.PreparedStatementParameterDTO)

Aggregations

Filter (io.hops.hopsworks.common.featurestore.query.filter.Filter)20 Feature (io.hops.hopsworks.common.featurestore.query.Feature)19 ArrayList (java.util.ArrayList)19 FilterLogic (io.hops.hopsworks.common.featurestore.query.filter.FilterLogic)16 SqlFilterLogic (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic)13 TrainingDatasetFilter (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter)13 TrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset)9 Test (org.junit.Test)9 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)8 Query (io.hops.hopsworks.common.featurestore.query.Query)5 FilterValue (io.hops.hopsworks.common.featurestore.query.filter.FilterValue)5 Join (io.hops.hopsworks.common.featurestore.query.join.Join)5 Comparator (java.util.Comparator)4 List (java.util.List)4 Collectors (java.util.stream.Collectors)4 EJB (javax.ejb.EJB)4 Stateless (javax.ejb.Stateless)4 TransactionAttribute (javax.ejb.TransactionAttribute)4 TransactionAttributeType (javax.ejb.TransactionAttributeType)4 ConstructorController (io.hops.hopsworks.common.featurestore.query.ConstructorController)3