Search in sources :

Example 21 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestConstructorController method testNoJoiningKeyMultipleDifferentSizes.

@Test
public void testNoJoiningKeyMultipleDifferentSizes() throws Exception {
    ConstructorController constructorController = new ConstructorController();
    List<Feature> availableLeft = new ArrayList<>();
    availableLeft.add(new Feature("ft1", true));
    availableLeft.add(new Feature("ft2", true));
    availableLeft.add(new Feature("ft4", true));
    List<Feature> availableRight = new ArrayList<>();
    availableRight.add(new Feature("ft1", true));
    availableRight.add(new Feature("ft2", true));
    availableRight.add(new Feature("ft3", true));
    Query leftQuery = new Query("fs1", "project_fs1", fg1, "fg1", availableLeft, availableLeft);
    Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg1", availableRight, availableRight);
    List<SqlCondition> expectedConditionList = Arrays.asList(SqlCondition.EQUALS, SqlCondition.EQUALS);
    Join join = queryController.extractPrimaryKeysJoin(leftQuery, rightQuery, JoinType.INNER, null);
    Assert.assertEquals(2, join.getLeftOn().size());
    Assert.assertEquals(2, join.getRightOn().size());
    Assert.assertEquals(expectedConditionList, join.getJoinOperator());
}
Also used : ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Test(org.junit.Test)

Example 22 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class TestConstructorController method testSingleJoinOrderBySQLQueryOnline.

@Test
public void testSingleJoinOrderBySQLQueryOnline() throws Exception {
    List<Feature> availableLeft = new ArrayList<>();
    availableLeft.add(new Feature("ft1", "fg1", "Float", null, null));
    List<Feature> availableRight = new ArrayList<>();
    availableRight.add(new Feature("ft1", "fg2", "Float", null, null));
    Query leftQuery = new Query("fs1", "project_fs2", fg1, "fg1", availableLeft, availableLeft);
    Query rightQuery = new Query("fs1", "project_fs1", fg2, "fg2", availableRight, availableRight);
    Join join = new Join(leftQuery, rightQuery, availableLeft, availableLeft, JoinType.INNER, null, singleEqualsJoinOperator);
    leftQuery.setJoins(Arrays.asList(join));
    List<Feature> orderByList = new ArrayList<>();
    orderByList.addAll(availableLeft);
    orderByList.addAll(availableRight);
    leftQuery.setOrderByFeatures(orderByList);
    String query = target.generateSQL(leftQuery, true).toSqlString(new SparkSqlDialect(SqlDialect.EMPTY_CONTEXT)).getSql().replace("\n", " ");
    Assert.assertEquals("SELECT `fg1`.`ft1`, `fg2`.`ft1` FROM `project_fs2`.`fg1_1` `fg1` INNER JOIN " + "`project_fs1`.`fg2_1` `fg2` ON `fg1`.`ft1` = `fg2`.`ft1` ORDER BY `fg1`.`ft1`, `fg2`.`ft1`", query);
}
Also used : SparkSqlDialect(org.apache.calcite.sql.dialect.SparkSqlDialect) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Test(org.junit.Test)

Example 23 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class PitJoinController method generateSubQueries.

public List<SqlCall> generateSubQueries(Query baseQuery, Query query, boolean isTrainingDataset) {
    List<SqlCall> subQueries = new ArrayList<>();
    // we always re-select all primary key columns of the "label group" in order to be able to perform final join
    List<Feature> additionalPkFeatures = query.getAvailableFeatures().stream().filter(Feature::isPrimary).map(f -> new Feature(f.getName(), f.getFgAlias(), f.getType(), f.isPrimary(), f.getDefaultValue(), PK_JOIN_PREFIX)).collect(Collectors.toList());
    additionalPkFeatures.add(new Feature(query.getFeaturegroup().getEventTime(), query.getAs(), (String) null, null, EVT_JOIN_PREFIX));
    additionalPkFeatures.forEach(f -> f.setFeatureGroup(query.getFeaturegroup()));
    // group
    for (Join join : query.getJoins()) {
        // add event time inequality join condition
        List<Feature> newLeftOn = addEventTimeOn(join.getLeftOn(), baseQuery.getFeaturegroup(), baseQuery.getAs());
        List<Feature> newRightOn = addEventTimeOn(join.getRightOn(), join.getRightQuery().getFeaturegroup(), join.getRightQuery().getAs());
        List<SqlCondition> newJoinOperator = addEventTimeCondition(join.getJoinOperator(), SqlCondition.GREATER_THAN_OR_EQUAL);
        // single right feature group
        List<Join> newJoins = Collections.singletonList(new Join(baseQuery, join.getRightQuery(), newLeftOn, newRightOn, join.getJoinType(), join.getPrefix(), newJoinOperator));
        baseQuery.setJoins(newJoins);
        // if it's a regular query, this is a no op
        if (isTrainingDataset) {
            baseQuery.setFeatures(dropIrrelevantSubqueryFeatures(query, join.getRightQuery()));
        }
        baseQuery.getFeatures().addAll(additionalPkFeatures);
        // first generate subquery and subsequently add rank over window
        SqlSelect subQuery = constructorController.generateSQL(baseQuery, false);
        // now add rank over window
        subQuery.getSelectList().add(rankOverAs(newLeftOn, new Feature(join.getRightQuery().getFeaturegroup().getEventTime(), join.getRightQuery().getAs(), false)));
        subQueries.add(SqlStdOperatorTable.AS.createCall(SqlParserPos.ZERO, subQuery, new SqlIdentifier(HIVE_ALIAS_PLACEHOLDER, SqlParserPos.ZERO)));
        baseQuery.setFeatures(new ArrayList<>(query.getFeatures()));
    }
    return subQueries;
}
Also used : Arrays(java.util.Arrays) JoinConditionType(org.apache.calcite.sql.JoinConditionType) Feature(io.hops.hopsworks.common.featurestore.query.Feature) JoinController(io.hops.hopsworks.common.featurestore.query.join.JoinController) ArrayList(java.util.ArrayList) Strings(com.google.common.base.Strings) SqlCall(org.apache.calcite.sql.SqlCall) SqlLiteral(org.apache.calcite.sql.SqlLiteral) SqlNode(org.apache.calcite.sql.SqlNode) SqlWith(org.apache.calcite.sql.SqlWith) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) SqlWindow(org.apache.calcite.sql.SqlWindow) SqlSelect(org.apache.calcite.sql.SqlSelect) EJB(javax.ejb.EJB) JoinType(org.apache.calcite.sql.JoinType) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) Join(io.hops.hopsworks.common.featurestore.query.join.Join) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Collectors(java.util.stream.Collectors) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) SqlJoin(org.apache.calcite.sql.SqlJoin) Comparator(java.util.Comparator) SqlNodeList(org.apache.calcite.sql.SqlNodeList) Collections(java.util.Collections) SqlCall(org.apache.calcite.sql.SqlCall) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) SqlJoin(org.apache.calcite.sql.SqlJoin) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) Feature(io.hops.hopsworks.common.featurestore.query.Feature) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) SqlSelect(org.apache.calcite.sql.SqlSelect)

Example 24 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class QueryController method convertQueryDTO.

/**
 * Recursively convert the QueryDTO into the internal query representation
 * @param queryDTO
 * @return
 */
public Query convertQueryDTO(QueryDTO queryDTO, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
    Integer fgId = queryDTO.getLeftFeatureGroup().getId();
    Featuregroup fg = fgLookup.get(fgId);
    String featureStore = featurestoreFacade.getHiveDbName(fg.getFeaturestore().getHiveDbId());
    // used to build the online query - needs to respect the online db format name
    String projectName = onlineFeaturestoreController.getOnlineFeaturestoreDbName(fg.getFeaturestore().getProject());
    List<Feature> requestedFeatures = validateFeatures(fg, queryDTO.getLeftFeatures(), availableFeatureLookup.get(fgId));
    Query query = new Query(featureStore, projectName, fg, fgAliasLookup.get(fgId), requestedFeatures, availableFeatureLookup.get(fgId), queryDTO.getHiveEngine());
    if (fg.getCachedFeaturegroup() != null && fg.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        // if hudi and end hive engine, only possible to get latest snapshot else raise exception
        if (queryDTO.getHiveEngine() && (queryDTO.getLeftFeatureGroupEndTime() != null || queryDTO.getJoins().stream().anyMatch(join -> join.getQuery().getLeftFeatureGroupEndTime() != null))) {
            throw new IllegalArgumentException("Hive engine on Python environments does not support incremental or " + "snapshot queries. Read feature group without timestamp to retrieve latest snapshot or switch to " + "environment with Spark Engine.");
        }
        // If the feature group is hudi, validate and configure start and end commit id/timestamp
        FeatureGroupCommit endCommit = featureGroupCommitCommitController.findCommitByDate(fg, queryDTO.getLeftFeatureGroupEndTime());
        query.setLeftFeatureGroupEndTimestamp(endCommit.getCommittedOn());
        query.setLeftFeatureGroupEndCommitId(endCommit.getFeatureGroupCommitPK().getCommitId());
        if ((queryDTO.getJoins() == null || queryDTO.getJoins().isEmpty()) && queryDTO.getLeftFeatureGroupStartTime() != null) {
            Long exactStartCommitTimestamp = featureGroupCommitCommitController.findCommitByDate(query.getFeaturegroup(), queryDTO.getLeftFeatureGroupStartTime()).getCommittedOn();
            query.setLeftFeatureGroupStartTimestamp(exactStartCommitTimestamp);
        } else if (queryDTO.getJoins() != null && queryDTO.getLeftFeatureGroupStartTime() != null) {
            throw new IllegalArgumentException("For incremental queries start time must be provided and " + "join statements are not allowed");
        }
    }
    // If there are any joins, recursively convert the Join's QueryDTO into the internal Query representation
    if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
        query.setJoins(convertJoins(query, queryDTO.getJoins(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled));
        // remove duplicated join columns
        removeDuplicateColumns(query, pitEnabled);
    }
    // If there are any filters, recursively convert the
    if (queryDTO.getFilter() != null) {
        query.setFilter(filterController.convertFilterLogic(queryDTO.getFilter(), fgLookup, availableFeatureLookup));
    }
    return query;
}
Also used : FeaturegroupFacade(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade) HashMap(java.util.HashMap) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeatureGroupCommitController(io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Collectors(java.util.stream.Collectors) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) Optional(java.util.Optional) Users(io.hops.hopsworks.persistence.entity.user.Users) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit)

Example 25 with Join

use of io.hops.hopsworks.common.featurestore.query.join.Join in project hopsworks by logicalclocks.

the class QueryController method populateFgLookupTables.

public int populateFgLookupTables(QueryDTO queryDTO, int fgId, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, Project project, Users user, String prefix) throws FeaturestoreException {
    // go into depth first
    if (queryDTO.getJoins() != null && !queryDTO.getJoins().isEmpty()) {
        for (JoinDTO join : queryDTO.getJoins()) {
            fgId = populateFgLookupTables(join.getQuery(), fgId, fgAliasLookup, fgLookup, availableFeatureLookup, project, user, join.getPrefix());
            fgId++;
        }
    }
    Featuregroup fg = validateFeaturegroupDTO(queryDTO.getLeftFeatureGroup());
    fgLookup.put(fg.getId(), fg);
    fgAliasLookup.put(fg.getId(), generateAs(fgId));
    List<Feature> availableFeatures = featuregroupController.getFeatures(fg, project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(fg.getId()), f.getType(), f.getDefaultValue(), f.getPrimary(), fg, prefix)).collect(Collectors.toList());
    availableFeatureLookup.put(fg.getId(), availableFeatures);
    return fgId;
}
Also used : FeaturegroupFacade(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade) HashMap(java.util.HashMap) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeatureGroupCommitController(io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Collectors(java.util.stream.Collectors) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) Optional(java.util.Optional) Users(io.hops.hopsworks.persistence.entity.user.Users) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)

Aggregations

Join (io.hops.hopsworks.common.featurestore.query.join.Join)45 ArrayList (java.util.ArrayList)39 Test (org.junit.Test)31 Query (io.hops.hopsworks.common.featurestore.query.Query)21 SparkSqlDialect (org.apache.calcite.sql.dialect.SparkSqlDialect)21 SqlCondition (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition)19 Feature (io.hops.hopsworks.common.featurestore.query.Feature)17 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)12 List (java.util.List)11 Collectors (java.util.stream.Collectors)11 EJB (javax.ejb.EJB)11 Stateless (javax.ejb.Stateless)11 TransactionAttribute (javax.ejb.TransactionAttribute)11 TransactionAttributeType (javax.ejb.TransactionAttributeType)11 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)9 RESTCodes (io.hops.hopsworks.restutils.RESTCodes)9 Level (java.util.logging.Level)9 JoinType (org.apache.calcite.sql.JoinType)9 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)8 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)8