use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method getServingStatements.
private List<ServingPreparedStatementDTO> getServingStatements(TrainingDataset trainingDataset, Project project, Users user, boolean batch) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<ServingPreparedStatementDTO> servingPreparedStatementDTOS = new ArrayList<>();
List<TrainingDatasetJoin> joins = trainingDatasetController.getJoinsSorted(trainingDataset);
// Check that all the feature groups still exists, if not throw a reasonable error
if (trainingDataset.getFeatures().stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// each join is a feature group, iterate over them.
for (TrainingDatasetJoin join : joins) {
Featuregroup featuregroup = join.getFeatureGroup();
if (!featuregroup.getCachedFeaturegroup().isOnlineEnabled()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURESTORE_ONLINE_NOT_ENABLED, Level.FINE, "Inference vector is only available for training datasets generated by online enabled " + "feature groups");
}
Map<String, Feature> featureGroupFeatures = featuregroupController.getFeatures(featuregroup, project, user).stream().collect(Collectors.toMap(FeatureGroupFeatureDTO::getName, f -> new Feature(f.getName(), ALIAS, f.getType(), f.getPrimary(), f.getDefaultValue(), join.getPrefix())));
// Identify and create primary key features for this feature group. Primary key features may not be the part of
// query that generated the training dataset.
List<Feature> primaryKeys = featureGroupFeatures.values().stream().filter(Feature::isPrimary).collect(Collectors.toList());
if (primaryKeys.size() == 0) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.PRIMARY_KEY_REQUIRED, Level.FINE, "Inference vector is only available for training datasets generated by feature groups with " + "at least 1 primary key");
}
// create td features
List<Feature> selectFeatures = join.getFeatures().stream().filter(tdf -> !tdf.isLabel()).sorted(Comparator.comparing(TrainingDatasetFeature::getIndex)).map(tdf -> featureGroupFeatures.get(tdf.getName())).collect(Collectors.toList());
// part of the prepared statement thus don't add to this query.
if (selectFeatures.size() > 0) {
// construct query for this feature group
Query query = new Query(featurestoreController.getOfflineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(featuregroup.getFeaturestore().getProject()), featuregroup, ALIAS, selectFeatures);
// construct ServingPreparedStatementDTO and add to the list
servingPreparedStatementDTOS.add(buildDTO(query, primaryKeys, featuregroup.getId(), join.getIndex(), batch));
}
}
return servingPreparedStatementDTOS;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class PreparedStatementBuilder method build.
public ServingPreparedStatementDTO build(UriInfo uriInfo, ResourceRequest resourceRequest, Project project, Users user, Featurestore featurestore, Integer trainingDatasetId, boolean batch) throws FeaturestoreException {
TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId);
List<ServingPreparedStatementDTO> servingPreparedStatementDTOs = getServingStatements(trainingDataset, project, user, batch);
ServingPreparedStatementDTO servingPreparedStatementDTO = new ServingPreparedStatementDTO();
servingPreparedStatementDTO.setHref(uri(uriInfo, project, featurestore, trainingDataset));
servingPreparedStatementDTO.setExpand(expand(resourceRequest));
if (servingPreparedStatementDTO.isExpand()) {
servingPreparedStatementDTO.setItems(servingPreparedStatementDTOs);
servingPreparedStatementDTO.setCount((long) servingPreparedStatementDTOs.size());
}
return servingPreparedStatementDTO;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetControllerTest method testconvertToFilterEntities_bothLogic.
@Test
public void testconvertToFilterEntities_bothLogic() throws Exception {
// fg.feature > 1 and (fg.feature > 2 OR fg.feature > 3)
// "fg.feature > 1" stores as filter logic
TrainingDataset trainingDataset = new TrainingDataset();
Feature f1 = new Feature("test_f", "fg0");
FilterLogic head = new FilterLogic();
head.setType(AND);
FilterLogic left = new FilterLogic();
Filter left_left = new Filter(f1, GREATER_THAN, "1");
left.setType(SINGLE);
left.setLeftFilter(left_left);
head.setLeftLogic(left);
FilterLogic right = new FilterLogic();
right.setType(OR);
Filter right_left = new Filter(f1, GREATER_THAN, "2");
Filter right_right = new Filter(f1, GREATER_THAN, "3");
right.setLeftFilter(right_left);
right.setRightFilter(right_right);
head.setRightLogic(right);
List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
List<TrainingDatasetFilter> expected = new ArrayList<>();
expected.add(createTrainingDatasetFilter(null, AND, "L"));
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "1"), SINGLE, "L.L"));
expected.add(createTrainingDatasetFilter(null, OR, "L.R"));
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L.R.L"));
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "3"), SINGLE, "L.R.R"));
Assert.assertEquals(expected.size(), actual.size());
Assert.assertTrue(expected.containsAll(actual));
Assert.assertTrue(actual.containsAll(expected));
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetControllerTest method testconvertToFilterEntities_leftOnlyFilter.
@Test
public void testconvertToFilterEntities_leftOnlyFilter() throws Exception {
// fg.feature > 2
// "fg.feature > 2" stores in the left-hand side
TrainingDataset trainingDataset = new TrainingDataset();
Feature f1 = new Feature("test_f", "fg0");
FilterLogic head = new FilterLogic();
head.setType(SINGLE);
Filter right = new Filter(f1, GREATER_THAN, "2");
head.setLeftFilter(right);
List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
List<TrainingDatasetFilter> expected = new ArrayList<>();
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L"));
Assert.assertEquals(expected.size(), actual.size());
Assert.assertTrue(expected.containsAll(actual));
Assert.assertTrue(actual.containsAll(expected));
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetControllerTest method testconvertToFilterEntities_bothFilter.
@Test
public void testconvertToFilterEntities_bothFilter() throws Exception {
// fg.feature > 1 and fg.feature > 2
// "fg.feature > 1" and "fg.feature > 2" stores as filter
TrainingDataset trainingDataset = new TrainingDataset();
Feature f1 = new Feature("test_f", "fg0");
FilterLogic head = new FilterLogic();
head.setType(AND);
Filter left = new Filter(f1, GREATER_THAN, "1");
head.setLeftFilter(left);
Filter right = new Filter(f1, GREATER_THAN, "2");
head.setRightFilter(right);
List<TrainingDatasetFilter> actual = target.convertToFilterEntities(head, trainingDataset, "L");
List<TrainingDatasetFilter> expected = new ArrayList<>();
expected.add(createTrainingDatasetFilter(null, AND, "L"));
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "2"), SINGLE, "L.R"));
expected.add(createTrainingDatasetFilter(createTrainingDatasetFilterCondition("test_f", GREATER_THAN, "1"), SINGLE, "L.L"));
Assert.assertEquals(expected.size(), actual.size());
Assert.assertTrue(expected.containsAll(actual));
Assert.assertTrue(actual.containsAll(expected));
}
Aggregations