Search in sources :

Example 11 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class PreparedStatementBuilder method getServingStatements.

private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
    if (!trainingDataset.isQuery()) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
    }
    List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
    List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
    // Check that all the feature groups still exists, if not throw a reasonable error
    if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
    }
    // each join is a feature group, iterate over them.
    for (TrainingDatasetJoin join : joins) {
        Featuregroup featuregroup = join.getFeatureGroup();
        if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
        }
        Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
        // Identify and create primary key features for this feature group. Primary key features may not be the part of
        // query that generated the training dataset.
        List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
        if (primaryKeys.size() == 0) {
            throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
        }
        // create td features
        List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
        // part of the prepared statement thus don't add to this query.
        if (selectFeatures.size() > 0) {
            // construct query for this feature group
            Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
            // construct ServingPreparedStatementDTO and add to the list
            servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
        }
    }
    return servingPreparedStatementDTOS;
}
Also used : Arrays(java.util.Arrays) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.TrainingDatasetController) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreController(io.hops.hopsworks.common.featurestore.FeaturestoreController) TransactionAttribute(javax.ejb.TransactionAttribute) Map(java.util.Map) ResourceRequest(io.hops.hopsworks.common.api.ResourceRequest) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) URI(java.net.URI) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) EJB(javax.ejb.EJB) Stateless(javax.ejb.Stateless) SqlCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlCondition) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) Collectors(java.util.stream.Collectors) SqlDialect(org.apache.calcite.sql.SqlDialect) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) List(java.util.List) FilterController(io.hops.hopsworks.common.featurestore.query.filter.FilterController) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) PreparedStatementParameterDTO(io.hops.hopsworks.common.featurestore.query.PreparedStatementParameterDTO) MysqlSqlDialect(org.apache.calcite.sql.dialect.MysqlSqlDialect) UriInfo(javax.ws.rs.core.UriInfo) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) Query(io.hops.hopsworks.common.featurestore.query.Query) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 12 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class PreparedStatementBuilder method build.

public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, Featurestore featurestore, Integer trainingDatasetId, boolean batch) throws FeaturestoreException {
    TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId);
    List<ServingPreparedStatementDTO> servingPreparedStatementDTOs = getServingStatements(trainingDataset, project, user, batch);
    ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO();
    servingPreparedStatementDTO.setHref(uri(uriInfo, project, featurestore, trainingDataset));
    servingPreparedStatementDTO.setExpand(expand(resourceRequest));
    if (servingPreparedStatementDTO.isExpand()) {
        servingPreparedStatementDTO.setItems(servingPreparedStatementDTOs);
        servingPreparedStatementDTO.setCount((long) servingPreparedStatementDTOs.size());
    }
    return servingPreparedStatementDTO;
}
Also used : TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) ServingPreparedStatementDTO(io.hops.hopsworks.common.featurestore.query.ServingPreparedStatementDTO)

Example 13 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetControllerTest method testconvertToFilterEntities_bothLogic.

@Test
public void testconvertToFilterEntities_bothLogic() throws Exception {
    // fg.feature > 1 and (fg.feature > 2 OR fg.feature > 3)
    // "fg.feature > 1" stores as filter logic
    TrainingDataset trainingDataset = new TrainingDataset();
    Feature f1 = new Feature("test_f", "fg0");
    FilterLogic head = new FilterLogic();
    head.setType(AND);
    FilterLogic left = new FilterLogic();
    Filter left_left = new Filter(f1, GREATER_THAN, "1");
    left.setType(SINGLE);
    left.setLeftFilter(left_left);
    head.setLeftLogic(left);
    FilterLogic right = new FilterLogic();
    right.setType(OR);
    Filter right_left = new Filter(f1, GREATER_THAN, "2");
    Filter right_right = new Filter(f1, GREATER_THAN, "3");
    right.setLeftFilter(right_left);
    right.setRightFilter(right_right);
    head.setRightLogic(right);
    List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
    List<TrainingDatasetFilter> expected = new ArrayList<>();
    expected.add(createTrainingDatasetFilter(null, AND, "L"));
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "1"), SINGLE, "L.L"));
    expected.add(createTrainingDatasetFilter(null, OR, "L.R"));
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L.R.L"));
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "3"), SINGLE, "L.R.R"));
    Assert.assertEquals(expected.size(), actual.size());
    Assert.assertTrue(expected.containsAll(actual));
    Assert.assertTrue(actual.containsAll(expected));
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) Test(org.junit.Test)

Example 14 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetControllerTest method testconvertToFilterEntities_leftOnlyFilter.

@Test
public void testconvertToFilterEntities_leftOnlyFilter() throws Exception {
    // fg.feature > 2
    // "fg.feature > 2" stores in the left-hand side
    TrainingDataset trainingDataset = new TrainingDataset();
    Feature f1 = new Feature("test_f", "fg0");
    FilterLogic head = new FilterLogic();
    head.setType(SINGLE);
    Filter right = new Filter(f1, GREATER_THAN, "2");
    head.setLeftFilter(right);
    List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
    List<TrainingDatasetFilter> expected = new ArrayList<>();
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L"));
    Assert.assertEquals(expected.size(), actual.size());
    Assert.assertTrue(expected.containsAll(actual));
    Assert.assertTrue(actual.containsAll(expected));
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) Test(org.junit.Test)

Example 15 with TrainingDataset

use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.

the class TrainingDatasetControllerTest method testconvertToFilterEntities_bothFilter.

@Test
public void testconvertToFilterEntities_bothFilter() throws Exception {
    // fg.feature > 1 and fg.feature > 2
    // "fg.feature > 1" and "fg.feature > 2" stores as filter
    TrainingDataset trainingDataset = new TrainingDataset();
    Feature f1 = new Feature("test_f", "fg0");
    FilterLogic head = new FilterLogic();
    head.setType(AND);
    Filter left = new Filter(f1, GREATER_THAN, "1");
    head.setLeftFilter(left);
    Filter right = new Filter(f1, GREATER_THAN, "2");
    head.setRightFilter(right);
    List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
    List<TrainingDatasetFilter> expected = new ArrayList<>();
    expected.add(createTrainingDatasetFilter(null, AND, "L"));
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L.R"));
    expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "1"), SINGLE, "L.L"));
    Assert.assertEquals(expected.size(), actual.size());
    Assert.assertTrue(expected.containsAll(actual));
    Assert.assertTrue(actual.containsAll(expected));
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) ArrayList(java.util.ArrayList) Feature(io.hops.hopsworks.common.featurestore.query.Feature) Test(org.junit.Test)

Aggregations

TrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset)29 Users (io.hops.hopsworks.persistence.entity.user.Users)12 ArrayList (java.util.ArrayList)11 Feature (io.hops.hopsworks.common.featurestore.query.Feature)10 Filter (io.hops.hopsworks.common.featurestore.query.filter.Filter)10 FilterLogic (io.hops.hopsworks.common.featurestore.query.filter.FilterLogic)10 Path (javax.ws.rs.Path)10 DatasetPath (io.hops.hopsworks.common.dataset.util.DatasetPath)9 JWTRequired (io.hops.hopsworks.jwt.annotation.JWTRequired)9 SqlFilterLogic (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic)9 TrainingDatasetFilter (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter)9 ApiOperation (io.swagger.annotations.ApiOperation)9 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)8 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)8 ResourceRequest (io.hops.hopsworks.common.api.ResourceRequest)8 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)8 ExternalTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset)8 HopsfsTrainingDataset (io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset)8 Produces (javax.ws.rs.Produces)8 Test (org.junit.Test)8