use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TransformationResource method getTransformationFunction.
// Copy from traindatasetService
@GET
@Path("/functions")
@JWTRequired(acceptedTokens = { Audience.API, Audience.JOB }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER" })
@ApiOperation(value = "Get Transformation functions.", response = TransformationFunctionAttachedDTO.class)
public Response getTransformationFunction(@Context SecurityContext sc, @Context HttpServletRequest req, @Context UriInfo uriInfo) throws FeaturestoreException {
if (trainingDatasetId == null) {
throw new IllegalArgumentException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ID_NOT_PROVIDED.getMessage());
}
TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId);
Users user = jWTHelper.getUserPrincipal(sc);
ResourceRequest resourceRequest = new ResourceRequest(ResourceRequest.Name.TRANSFORMATIONFUNCTIONS);
TransformationFunctionAttachedDTO transformationFunctionAttachedDTO = transformationFunctionBuilder.build(uriInfo, resourceRequest, user, project, trainingDataset);
return Response.ok().entity(transformationFunctionAttachedDTO).build();
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method updateTrainingDatasetMetadata.
/**
* Updates a training dataset with new metadata
*
* @param user
* @param project
* @param featurestore the featurestore that the trainingDataset is linked to
* @param trainingDatasetDTO the user input data for updating the training dataset
* @return a JSON/XML DTO of the updated training dataset
* @throws FeaturestoreException
* @throws ServiceException
*/
public TrainingDatasetDTO updateTrainingDatasetMetadata(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ServiceException {
TrainingDataset trainingDataset = trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetDTO.getId(), featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id: " + trainingDatasetDTO.getId()));
// Verify general entity related information
trainingDatasetInputValidation.verifyUserInput(trainingDatasetDTO);
// Update metadata
trainingDataset.setDescription(trainingDatasetDTO.getDescription());
trainingDatasetFacade.update(trainingDataset);
// Refetch the updated entry from the database
TrainingDataset updatedTrainingDataset = trainingDatasetFacade.findByIdAndFeaturestore(trainingDatasetDTO.getId(), featurestore).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NOT_FOUND, Level.FINE, "training dataset id: " + trainingDatasetDTO.getId()));
return convertTrainingDatasetToDTO(user, project, updatedTrainingDataset);
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method getQuery.
// TODO feature view: remove
/**
* Reconstruct the query used to generate the training datset, fetching the features and the joins
* in the proper order from the database.
* @param trainingDataset
* @return
* @throws FeaturestoreException
*/
public Query getQuery(TrainingDataset trainingDataset, boolean withLabel, Project project, Users user, Boolean isHiveEngine) throws FeaturestoreException {
if (!trainingDataset.isQuery()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_QUERY, Level.FINE, "Inference vector is only available for datasets generated by queries");
}
List<TrainingDatasetJoin> joins = getJoinsSorted(trainingDataset);
// Convert all the TrainingDatasetFeatures to QueryFeatures
Map<Integer, String> fgAliasLookup = getAliasLookupTable(joins);
// These features are for the select part and are from different feature groups
// to respect the ordering, all selected features are added to the left most Query instead of splitting them
// over the querys for their respective origin feature group
List<TrainingDatasetFeature> tdFeatures = getFeaturesSorted(trainingDataset, withLabel);
// Check that all the feature groups still exists, if not throw a reasonable error
if (tdFeatures.stream().anyMatch(j -> j.getFeatureGroup() == null)) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_QUERY_FG_DELETED, Level.FINE);
}
// Get available features for all involved feature groups once, and save in map fgId -> availableFeatures
Map<Integer, List<Feature>> availableFeaturesLookup = new HashMap<>();
for (TrainingDatasetJoin join : joins) {
if (!availableFeaturesLookup.containsKey(join.getFeatureGroup().getId())) {
List<Feature> availableFeatures = featuregroupController.getFeatures(join.getFeatureGroup(), project, user).stream().map(f -> new Feature(f.getName(), fgAliasLookup.get(join.getId()), f.getType(), f.getDefaultValue(), f.getPrimary(), join.getFeatureGroup(), join.getPrefix())).collect(Collectors.toList());
availableFeaturesLookup.put(join.getFeatureGroup().getId(), availableFeatures);
}
}
Map<String, Feature> featureLookup = availableFeaturesLookup.values().stream().flatMap(List::stream).collect(Collectors.toMap(f -> makeFeatureLookupKey(f.getFeatureGroup().getId(), f.getName()), f -> f, (f1, f2) -> f1));
List<Feature> features = new ArrayList<>();
for (TrainingDatasetFeature requestedFeature : tdFeatures) {
Feature tdFeature = featureLookup.get(makeFeatureLookupKey(requestedFeature.getFeatureGroup().getId(), requestedFeature.getName()));
if (tdFeature == null) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.FEATURE_DOES_NOT_EXIST, Level.FINE, "Feature: " + requestedFeature.getName() + " not found in feature group: " + requestedFeature.getFeatureGroup().getName());
}
// instantiate new feature since alias in available feature is not correct if fg is joined with itself
Feature featureWithCorrectAlias = new Feature(tdFeature.getName(), fgAliasLookup.get(requestedFeature.getTrainingDatasetJoin().getId()), tdFeature.getType(), tdFeature.getDefaultValue(), tdFeature.getPrefix(), requestedFeature.getFeatureGroup(), requestedFeature.getIndex());
features.add(featureWithCorrectAlias);
}
// Keep a map feature store id -> feature store name
Map<Integer, String> fsLookup = getFsLookupTableJoins(joins);
Query query = new Query(fsLookup.get(joins.get(0).getFeatureGroup().getFeaturestore().getId()), onlineFeaturestoreController.getOnlineFeaturestoreDbName(joins.get(0).getFeatureGroup().getFeaturestore().getProject()), joins.get(0).getFeatureGroup(), fgAliasLookup.get(joins.get(0).getId()), features, availableFeaturesLookup.get(joins.get(0).getFeatureGroup().getId()), isHiveEngine);
// Set the remaining feature groups as join
List<Join> queryJoins = new ArrayList<>();
for (int i = 1; i < joins.size(); i++) {
// left side of the join stays fixed, the counter starts at 1
queryJoins.add(getQueryJoin(query, joins.get(i), fgAliasLookup, fsLookup, availableFeaturesLookup, isHiveEngine));
}
query.setJoins(queryJoins);
FilterLogic filterLogic = convertToFilterLogic(trainingDataset.getFilters(), featureLookup, "L");
query.setFilter(filterLogic);
return query;
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class TrainingDatasetController method createTrainingDataset.
public TrainingDatasetDTO createTrainingDataset(Users user, Project project, Featurestore featurestore, TrainingDatasetDTO trainingDatasetDTO) throws FeaturestoreException, ProvenanceException, IOException, ServiceException {
// if version not provided, get latest and increment
if (trainingDatasetDTO.getVersion() == null) {
// returns ordered list by desc version
List<TrainingDataset> tdPrevious = trainingDatasetFacade.findByNameAndFeaturestoreOrderedDescVersion(trainingDatasetDTO.getName(), featurestore);
if (tdPrevious != null && !tdPrevious.isEmpty()) {
trainingDatasetDTO.setVersion(tdPrevious.get(0).getVersion() + 1);
} else {
trainingDatasetDTO.setVersion(1);
}
}
// Check that training dataset doesn't already exists
if (trainingDatasetFacade.findByNameVersionAndFeaturestore(trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion(), featurestore).isPresent()) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_ALREADY_EXISTS, Level.FINE, "Training Dataset: " + trainingDatasetDTO.getName() + ", version: " + trainingDatasetDTO.getVersion());
}
// If the training dataset is constructed from a query, verify that it compiles correctly
Query query = null;
if (trainingDatasetDTO.getQueryDTO() != null) {
query = constructQuery(trainingDatasetDTO.getQueryDTO(), project, user);
} else if (trainingDatasetDTO.getFeatures() == null) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRAINING_DATASET_NO_SCHEMA, Level.FINE, "The training dataset doesn't have any feature");
}
// Verify input
inputValidation.validate(trainingDatasetDTO, query);
Inode inode = null;
FeaturestoreConnector featurestoreConnector;
if (trainingDatasetDTO.getTrainingDatasetType() == TrainingDatasetType.HOPSFS_TRAINING_DATASET) {
if (trainingDatasetDTO.getStorageConnector() != null && trainingDatasetDTO.getStorageConnector().getId() != null) {
featurestoreConnector = featurestoreConnectorFacade.findByIdType(trainingDatasetDTO.getStorageConnector().getId(), FeaturestoreConnectorType.HOPSFS).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.HOPSFS_CONNECTOR_NOT_FOUND, Level.FINE, "HOPSFS Connector: " + trainingDatasetDTO.getStorageConnector().getId()));
} else {
featurestoreConnector = getDefaultHopsFSTrainingDatasetConnector(featurestore);
}
} else {
if (trainingDatasetDTO.getStorageConnector() == null) {
throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Storage connector is empty");
}
featurestoreConnector = featurestoreConnectorFacade.findById(trainingDatasetDTO.getStorageConnector().getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.CONNECTOR_NOT_FOUND, Level.FINE, "Connector: " + trainingDatasetDTO.getStorageConnector().getId()));
}
// for HopsFS TD it will either be the default connector already or it will be a connector pointing to another
// HopsFS Directory
// for external TD we will use default connector
Dataset trainingDatasetsFolder;
if (featurestoreConnector.getHopsfsConnector() != null) {
trainingDatasetsFolder = featurestoreConnector.getHopsfsConnector().getHopsfsDataset();
} else {
trainingDatasetsFolder = getDefaultHopsFSTrainingDatasetConnector(featurestore).getHopsfsConnector().getHopsfsDataset();
}
// TODO(Fabio) account for path
// we allow specifying the path in the training dataset dir, but it is not really used, this option will be
// deprecated for hopsfs training datasets.
String trainingDatasetPath = getTrainingDatasetPath(inodeController.getPath(trainingDatasetsFolder.getInode()), trainingDatasetDTO.getName(), trainingDatasetDTO.getVersion());
DistributedFileSystemOps udfso = null;
String username = hdfsUsersBean.getHdfsUserName(project, user);
try {
udfso = dfs.getDfsOps(username);
udfso.mkdir(trainingDatasetPath);
inode = inodeController.getInodeAtPath(trainingDatasetPath);
TrainingDatasetDTO completeTrainingDatasetDTO = createTrainingDatasetMetadata(user, project, featurestore, trainingDatasetDTO, query, featurestoreConnector, inode);
fsProvenanceController.trainingDatasetAttachXAttr(trainingDatasetPath, completeTrainingDatasetDTO, udfso);
return completeTrainingDatasetDTO;
} finally {
if (udfso != null) {
dfs.closeDfsClient(udfso);
}
}
}
use of io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset in project hopsworks by logicalclocks.
the class FsQueryBuilder method build.
public FsQueryDTO build(UriInfo uriInfo, Project project, Users user, Featurestore featurestore, Integer trainingDatasetId, boolean withLabel, boolean isHiveEngine) throws FeaturestoreException, ServiceException {
TrainingDataset trainingDataset = trainingDatasetController.getTrainingDatasetById(featurestore, trainingDatasetId);
Query query = trainingDatasetController.getQuery(trainingDataset, withLabel, project, user, isHiveEngine);
FsQueryDTO dto = constructorController.construct(query, pitJoinController.isPitEnabled(query), true, project, user);
dto.setHref(uri(uriInfo, project));
return dto;
}
Aggregations