Search in sources :

Example 11 with Featuregroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup in project hopsworks by logicalclocks.

the class QueryController method convertJoins.

/**
 * Convert the JoinDTOs into the internal representation of the Join object.
 * The returned list will already contain the correct set of joining keys
 * @param leftQuery
 * @param joinDTOS
 * @return
 */
private List<Join> convertJoins(Query leftQuery, List<JoinDTO> joinDTOS, Map<Integer, String> fgAliasLookup, Map<Integer, Featuregroup> fgLookup, Map<Integer, List<Feature>> availableFeatureLookup, boolean pitEnabled) throws FeaturestoreException {
    List<Join> joins = new ArrayList<>();
    for (JoinDTO joinDTO : joinDTOS) {
        if (joinDTO.getQuery() == null) {
            throw new IllegalArgumentException("Subquery not specified");
        }
        // Recursively convert the QueryDTO. Currently we don't support Joins of Joins
        Query rightQuery = convertQueryDTO(joinDTO.getQuery(), fgAliasLookup, fgLookup, availableFeatureLookup, pitEnabled);
        if (joinDTO.getOn() != null && !joinDTO.getOn().isEmpty()) {
            List<Feature> leftOn = joinDTO.getOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
            List<Feature> rightOn = joinDTO.getOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
            joins.add(extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinDTO.getType(), joinDTO.getPrefix()));
        } else if (joinDTO.getLeftOn() != null && !joinDTO.getLeftOn().isEmpty()) {
            List<Feature> leftOn = joinDTO.getLeftOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
            List<Feature> rightOn = joinDTO.getRightOn().stream().map(f -> new Feature(f.getName())).collect(Collectors.toList());
            joins.add(extractLeftRightOn(leftQuery, rightQuery, leftOn, rightOn, joinDTO.getType(), joinDTO.getPrefix()));
        } else {
            // Only if right feature group is present, extract the primary keys for the join
            joins.add(extractPrimaryKeysJoin(leftQuery, rightQuery, joinDTO.getType(), joinDTO.getPrefix()));
        }
    }
    return joins;
}
Also used : FeaturegroupFacade(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade) HashMap(java.util.HashMap) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeatureGroupCommitController(io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController) TransactionAttributeType(javax.ejb.TransactionAttributeType) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) Collectors(java.util.stream.Collectors) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) Optional(java.util.Optional) Users(io.hops.hopsworks.persistence.entity.user.Users) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) JoinDTO(io.hops.hopsworks.common.featurestore.query.join.JoinDTO) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) ArrayList(java.util.ArrayList) List(java.util.List)

Example 12 with Featuregroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup in project hopsworks by logicalclocks.

the class FeaturegroupController method createFeaturegroupNoValidation.

public FeaturegroupDTO createFeaturegroupNoValidation(Featurestore featurestore, FeaturegroupDTO featuregroupDTO, Project project, Users user) throws FeaturestoreException, SQLException, ProvenanceException, ServiceException, KafkaException, SchemaException, ProjectException, UserException, IOException, HopsSecurityException {
    // Persist specific feature group metadata (cached fg or on-demand fg)
    OnDemandFeaturegroup onDemandFeaturegroup = null;
    CachedFeaturegroup cachedFeaturegroup = null;
    List<FeatureGroupFeatureDTO> featuresNoHudi = null;
    if (featuregroupDTO instanceof CachedFeaturegroupDTO) {
        // make copy of schema without hudi columns
        featuresNoHudi = new ArrayList<>(featuregroupDTO.getFeatures());
        cachedFeaturegroup = cachedFeaturegroupController.createCachedFeaturegroup(featurestore, (CachedFeaturegroupDTO) featuregroupDTO, project, user);
    } else {
        onDemandFeaturegroup = onDemandFeaturegroupController.createOnDemandFeaturegroup(featurestore, (OnDemandFeaturegroupDTO) featuregroupDTO, project, user);
    }
    // Persist basic feature group metadata
    Featuregroup featuregroup = persistFeaturegroupMetadata(featurestore, user, featuregroupDTO, cachedFeaturegroup, onDemandFeaturegroup);
    // online feature group needs to be set up after persisting metadata in order to get feature group id
    if (featuregroupDTO instanceof CachedFeaturegroupDTO && settings.isOnlineFeaturestore() && featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
        onlineFeaturegroupController.setupOnlineFeatureGroup(featurestore, featuregroup, featuresNoHudi, project, user);
    }
    FeaturegroupDTO completeFeaturegroupDTO = convertFeaturegrouptoDTO(featuregroup, project, user);
    // Extract metadata
    String hdfsUsername = hdfsUsersController.getHdfsUserName(project, user);
    DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsername);
    try {
        String fgPath = Utils.getFeaturestorePath(featurestore.getProject(), settings) + "/" + Utils.getFeaturegroupName(featuregroup);
        fsController.featuregroupAttachXAttrs(fgPath, completeFeaturegroupDTO, udfso);
    } finally {
        dfs.closeDfsClient(udfso);
    }
    // Log activity
    fsActivityFacade.logMetadataActivity(user, featuregroup, FeaturestoreActivityMeta.FG_CREATED, null);
    return completeFeaturegroupDTO;
}
Also used : OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)

Example 13 with Featuregroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup in project hopsworks by logicalclocks.

the class FeaturegroupController method updateFeaturegroupMetadata.

/**
 * Updates metadata for a featuregroup
 *
 * @param featurestore    the featurestore where the featuregroup resides
 * @param featuregroupDTO the updated featuregroup metadata
 * @return DTO of the updated feature group
 * @throws FeaturestoreException
 */
public FeaturegroupDTO updateFeaturegroupMetadata(Project project, Users user, Featurestore featurestore, FeaturegroupDTO featuregroupDTO) throws FeaturestoreException, SQLException, ProvenanceException, ServiceException, SchemaException, KafkaException {
    Featuregroup featuregroup = getFeaturegroupById(featurestore, featuregroupDTO.getId());
    // currently supports updating:
    // adding new features
    // feature group description
    // feature descriptions
    // Verify general entity related information
    featurestoreInputValidation.verifyDescription(featuregroupDTO);
    featureGroupInputValidation.verifyFeatureGroupFeatureList(featuregroupDTO.getFeatures());
    // Update on-demand feature group metadata
    if (featuregroup.getFeaturegroupType() == FeaturegroupType.CACHED_FEATURE_GROUP) {
        cachedFeaturegroupController.updateMetadata(project, user, featuregroup, (CachedFeaturegroupDTO) featuregroupDTO);
    } else if (featuregroup.getFeaturegroupType() == FeaturegroupType.ON_DEMAND_FEATURE_GROUP) {
        onDemandFeaturegroupController.updateOnDemandFeaturegroupMetadata(featuregroup.getOnDemandFeaturegroup(), (OnDemandFeaturegroupDTO) featuregroupDTO);
    }
    // get feature group object again after alter table
    featuregroup = getFeaturegroupById(featurestore, featuregroupDTO.getId());
    featuregroupDTO = convertFeaturegrouptoDTO(featuregroup, project, user);
    String hdfsUsername = hdfsUsersController.getHdfsUserName(project, user);
    DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsername);
    try {
        String fgPath = Utils.getFeaturestorePath(featurestore.getProject(), settings) + "/" + Utils.getFeaturegroupName(featuregroup);
        fsController.featuregroupAttachXAttrs(fgPath, featuregroupDTO, udfso);
    } finally {
        dfs.closeDfsClient(udfso);
    }
    return featuregroupDTO;
}
Also used : Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO)

Example 14 with Featuregroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup in project hopsworks by logicalclocks.

the class FeaturegroupController method getFeaturegroupsForFeaturestore.

/**
 * Gets all featuregroups for a particular featurestore and project, using the userCerts to query Hive
 *
 * @param featurestore featurestore to query featuregroups for
 * @return list of XML/JSON DTOs of the featuregroups
 */
public List<FeaturegroupDTO> getFeaturegroupsForFeaturestore(Featurestore featurestore, Project project, Users user, Set<String> expectationNames) throws FeaturestoreException, ServiceException {
    List<Featuregroup> featuregroups = new ArrayList<>();
    if (expectationNames != null && !expectationNames.isEmpty()) {
        for (String name : expectationNames) {
            for (FeatureGroupExpectation featureGroupExpectation : featureStoreExpectationFacade.findByFeaturestoreAndName(featurestore, name).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_STORE_EXPECTATION_NOT_FOUND, Level.FINE, name)).getFeatureGroupExpectations()) {
                if (featuregroups.isEmpty()) {
                    featuregroups.add(featureGroupExpectation.getFeaturegroup());
                } else {
                    boolean found = featuregroups.stream().anyMatch(fg -> fg.getId().equals(featureGroupExpectation.getFeaturegroup().getId()));
                    if (!found) {
                        featuregroups.add(featureGroupExpectation.getFeaturegroup());
                    }
                }
            }
        }
    } else {
        featuregroups = featuregroupFacade.findByFeaturestore(featurestore);
    }
    List<FeaturegroupDTO> featuregroupDTOS = new ArrayList<>();
    for (Featuregroup featuregroup : featuregroups) {
        featuregroupDTOS.add(convertFeaturegrouptoDTO(featuregroup, project, user));
    }
    return featuregroupDTOS;
}
Also used : FeatureGroupExpectation(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.datavalidation.FeatureGroupExpectation) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) OnDemandFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) ArrayList(java.util.ArrayList) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) OnDemandFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.ondemand.OnDemandFeaturegroupDTO) CachedFeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupDTO)

Example 15 with Featuregroup

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup in project hopsworks by logicalclocks.

the class TestQueryController method setup.

@Before
public void setup() {
    fs = new Featurestore();
    fs.setHiveDbId(1l);
    fs.setProject(new Project("test_proj"));
    cachedFeaturegroup = new CachedFeaturegroup();
    cachedFeaturegroup.setTimeTravelFormat(TimeTravelFormat.NONE);
    fg1 = new Featuregroup(1);
    fg1.setName("fg1");
    fg1.setVersion(1);
    fg1.setCachedFeaturegroup(cachedFeaturegroup);
    fg1.setFeaturestore(fs);
    fg2 = new Featuregroup(2);
    fg2.setName("fg2");
    fg2.setVersion(1);
    fg2.setCachedFeaturegroup(cachedFeaturegroup);
    fg2.setFeaturestore(fs);
    fg3 = new Featuregroup(3);
    fg3.setName("fg3");
    fg3.setVersion(1);
    fg3.setCachedFeaturegroup(cachedFeaturegroup);
    fg3.setFeaturestore(fs);
    fg4 = new Featuregroup(4);
    fg4.setName("fg4");
    fg4.setVersion(1);
    fg4.setCachedFeaturegroup(cachedFeaturegroup);
    fg4.setFeaturestore(fs);
    fgHudi = new Featuregroup(5);
    fgHudi.setName("fgHudi");
    fgHudi.setVersion(1);
    hudiFeatureGroup = new CachedFeaturegroup();
    hudiFeatureGroup.setTimeTravelFormat(TimeTravelFormat.HUDI);
    fgHudi.setCachedFeaturegroup(hudiFeatureGroup);
    fgHudi.setFeaturestore(fs);
    fg1Features = new ArrayList<>();
    fg1Features.add(new Feature("pr", "", true));
    fg1Features.add(new Feature("fg1_ft2", "", false));
    fg1FeaturesDTO = new ArrayList<>();
    fg1FeaturesDTO.add(new FeatureGroupFeatureDTO("pr", "Integer", "", true, false, "", null));
    fg1FeaturesDTO.add(new FeatureGroupFeatureDTO("fg1_ft2", "String", "", false, false, "", null));
    fg2Features = new ArrayList<>();
    fg2Features.add(new Feature("pr", "", true));
    fg2Features.add(new Feature("fg2_ft2", "", false));
    fg2FeaturesDTO = new ArrayList<>();
    fg2FeaturesDTO.add(new FeatureGroupFeatureDTO("pr", "Integer", "", true, false, "", null));
    fg2FeaturesDTO.add(new FeatureGroupFeatureDTO("fg2_ft2", "String", "", false, false, "", null));
    fg3Features = new ArrayList<>();
    fg3Features.add(new Feature("fg3_ft1", "", true));
    fg3Features.add(new Feature("fg3_ft2", "", false));
    fg4Features = new ArrayList<>();
    fg4Features.add(new Feature("pr", "fg4", true));
    fg4Features.add(new Feature("fg4_ft4_1", "fg4", "Float", null, "prefix4_"));
    fg4Features.add(new Feature("fg4_ft4_2", "fg4", "Float", null, "prefix4_"));
    fg4Features.add(new Feature("_hoodie_record_key", "fg4", "String", null, null));
    fg4Features.add(new Feature("_hoodie_partition_path", "fg4", "String", null, null));
    fg4Features.add(new Feature("_hoodie_commit_time", "fg4", "String", null, null));
    fg4Features.add(new Feature("_hoodie_file_name", "fg4", "String", null, null));
    fg4Features.add(new Feature("_hoodie_commit_seqno", "fg4", "String", null, null));
    singleEqualsJoinOperator = Arrays.asList(SqlCondition.EQUALS);
    featuregroupController = Mockito.mock(FeaturegroupController.class);
    featuregroupFacade = Mockito.mock(FeaturegroupFacade.class);
    featurestoreFacade = Mockito.mock(FeaturestoreFacade.class);
    onlineFeaturestoreController = Mockito.mock(OnlineFeaturestoreController.class);
    cachedFeaturegroupController = Mockito.mock(CachedFeaturegroupController.class);
    project = Mockito.mock(Project.class);
    user = Mockito.mock(Users.class);
    filterController = new FilterController(new ConstructorController());
    target = new QueryController(featuregroupController, featuregroupFacade, filterController, featurestoreFacade, onlineFeaturestoreController);
    new JoinController(new ConstructorController());
}
Also used : FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) FeaturegroupFacade(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupFacade) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) CachedFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupController) Users(io.hops.hopsworks.persistence.entity.user.Users) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) Project(io.hops.hopsworks.persistence.entity.project.Project) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) JoinController(io.hops.hopsworks.common.featurestore.query.join.JoinController) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) CachedFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.cached.CachedFeaturegroupController) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) Before(org.junit.Before)

Aggregations

Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)48 CachedFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)20 Users (io.hops.hopsworks.persistence.entity.user.Users)19 ArrayList (java.util.ArrayList)17 Project (io.hops.hopsworks.persistence.entity.project.Project)15 Path (javax.ws.rs.Path)13 DatasetPath (io.hops.hopsworks.common.dataset.util.DatasetPath)12 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)12 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)11 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)11 FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)11 JWTRequired (io.hops.hopsworks.jwt.annotation.JWTRequired)11 Featurestore (io.hops.hopsworks.persistence.entity.featurestore.Featurestore)11 ApiOperation (io.swagger.annotations.ApiOperation)11 Produces (javax.ws.rs.Produces)11 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)9 Feature (io.hops.hopsworks.common.featurestore.query.Feature)9 FilterController (io.hops.hopsworks.common.featurestore.query.filter.FilterController)8 OnDemandFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.ondemand.OnDemandFeaturegroup)8 HashMap (java.util.HashMap)8