Search in sources :

Example 1 with TransformationFunction

use of io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction in project hopsworks by logicalclocks.

the class TrainingDatasetController method collectFeatures.

// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
    List<TrainingDatasetFeature> features = new ArrayList<>();
    boolean isLabel = false;
    TransformationFunction transformationFunction = null;
    for (Feature f : query.getFeatures()) {
        if (featureDTOs != null && !featureDTOs.isEmpty()) {
            // identify if feature is label
            isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
            // get transformation function for this feature
            transformationFunction = getTransformationFunction(f, featureDTOs);
        }
        features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
    }
    if (query.getJoins() != null) {
        for (Join join : query.getJoins()) {
            joinIndex++;
            List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
            features.addAll(joinFeatures);
            featureIndex += joinFeatures.size();
        }
    }
    return features;
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) FeaturegroupType(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType) Date(java.util.Date) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HopsfsTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetController) HopsFSProvenanceController(io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController) Settings(io.hops.hopsworks.common.util.Settings) TransactionAttributeType(javax.ejb.TransactionAttributeType) Map(java.util.Map) FilterValue(io.hops.hopsworks.common.featurestore.query.filter.FilterValue) FeatureView(io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) Utils(io.hops.hopsworks.common.hdfs.Utils) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) TransformationFunctionFacade(io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade) HopsfsTrainingDatasetFacade(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade) Collection(java.util.Collection) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) Join(io.hops.hopsworks.common.featurestore.query.join.Join) StatisticColumnController(io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController) FeaturestoreConnectorFacade(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade) Collectors(java.util.stream.Collectors) QueryController(io.hops.hopsworks.common.featurestore.query.QueryController) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) ExternalTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetController) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) DistributedFsService(io.hops.hopsworks.common.hdfs.DistributedFsService) InodeController(io.hops.hopsworks.common.hdfs.inode.InodeController) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) TrainingDatasetJoinCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoinCondition) FeaturestoreConnectorType(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType) HashMap(java.util.HashMap) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) Streams(com.logicalclocks.shaded.com.google.common.collect.Streams) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) TrainingDatasetFilterCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilterCondition) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) PitJoinController(io.hops.hopsworks.common.featurestore.query.pit.PitJoinController) TransactionAttribute(javax.ejb.TransactionAttribute) HdfsUsersController(io.hops.hopsworks.common.hdfs.HdfsUsersController) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetType(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) EJB(javax.ejb.EJB) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) StatisticsController(io.hops.hopsworks.common.featurestore.statistics.StatisticsController) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) ServiceException(io.hops.hopsworks.exceptions.ServiceException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature)

Example 2 with TransformationFunction

use of io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction in project hopsworks by logicalclocks.

the class TrainingDatasetController method getTransformationFunction.

private TransformationFunction getTransformationFunction(Feature feature, List<TrainingDatasetFeatureDTO> featureDTOs) throws FeaturestoreException {
    TrainingDatasetFeatureDTO featureDTO = featureDTOs.stream().filter(dto -> feature.getName().equals(dto.getFeatureGroupFeatureName())).findFirst().orElse(null);
    TransformationFunction transformationFunction = null;
    if (featureDTO != null && featureDTO.getTransformationFunction() != null) {
        transformationFunction = transformationFunctionFacade.findById(featureDTO.getTransformationFunction().getId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRANSFORMATION_FUNCTION_DOES_NOT_EXIST, Level.FINE, "Could not find transformation function with ID" + featureDTO.getTransformationFunction().getId()));
    }
    return transformationFunction;
}
Also used : TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)

Example 3 with TransformationFunction

use of io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction in project hopsworks by logicalclocks.

the class TransformationFunctionController method delete.

public void delete(Project project, Featurestore featurestore, Users user, Integer transformationFunctionId) throws FeaturestoreException {
    TransformationFunction transformationFunction = transformationFunctionFacade.findById(transformationFunctionId).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.TRANSFORMATION_FUNCTION_DOES_NOT_EXIST, Level.FINE, "Could not find transformation function with ID" + transformationFunctionId));
    // Check if trying to delete built in transformation function
    if (FeaturestoreConstants.BUILT_IN_TRANSFORMATION_FUNCTION_NAMES.contains(transformationFunction.getName()) && transformationFunction.getVersion() == 1) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ERROR_DELETING_TRANSFORMERFUNCTION, Level.FINE, "Deleting built-in transformation function `" + transformationFunction.getName() + "` with version 1 is not " + "allowed. Create a new version instead.");
    }
    DistributedFileSystemOps udfso = null;
    try {
        udfso = dfs.getDfsOps(hdfsUsersController.getHdfsUserName(project, user));
        String dirName = getFeatureStoreEntityName(transformationFunction.getName(), transformationFunction.getVersion());
        // Construct the directory path
        Path dirPath = new Path(getOrCreatePath(featurestore, udfso), dirName);
        // delete json files
        udfso.rm(dirPath, true);
    } catch (IOException e) {
        throw new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.ERROR_DELETING_TRANSFORMERFUNCTION, Level.WARNING, "", e.getMessage(), e);
    } finally {
        dfs.closeDfsClient(udfso);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) IOException(java.io.IOException) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)

Example 4 with TransformationFunction

use of io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction in project hopsworks by logicalclocks.

the class TransformationFunctionFacade method findByFeaturestore.

/**
 * Retrieves transformation functions by featurestore
 *
 * @param featurestore
 * @param offset
 * @return
 */
public AbstractFacade.CollectionInfo findByFeaturestore(Integer offset, Integer limit, Set<? extends AbstractFacade.FilterBy> filters, Set<? extends AbstractFacade.SortBy> sort, Featurestore featurestore) {
    String queryStr = buildQuery("SELECT tfn FROM TransformationFunction tfn ", filters, sort, "tfn.featurestore = :featurestore ");
    String queryCountStr = buildQuery("SELECT COUNT(tfn.id) FROM TransformationFunction tfn ", filters, sort, "tfn.featurestore = :featurestore ");
    Query query = em.createQuery(queryStr, TransformationFunction.class).setParameter("featurestore", featurestore);
    Query queryCount = em.createQuery(queryCountStr, TransformationFunction.class).setParameter("featurestore", featurestore);
    setFilter(filters, query);
    setFilter(filters, queryCount);
    setOffsetAndLim(offset, limit, query);
    return findAll(offset, limit, filters, query, queryCount);
}
Also used : TypedQuery(javax.persistence.TypedQuery) Query(javax.persistence.Query) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)

Example 5 with TransformationFunction

use of io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction in project hopsworks by logicalclocks.

the class TransformationFunctionResource method register.

@POST
@Consumes(MediaType.APPLICATION_JSON)
@Produces(MediaType.APPLICATION_JSON)
@AllowedProjectRoles({ AllowedProjectRoles.DATA_OWNER, AllowedProjectRoles.DATA_SCIENTIST })
@JWTRequired(acceptedTokens = { Audience.API, Audience.JOB }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER" })
@ApiKeyRequired(acceptedScopes = { ApiScope.FEATURESTORE }, allowedUserRoles = { "HOPS_ADMIN", "HOPS_USER" })
@ApiOperation(value = "Register transformation function in to a featurestore", response = TransformationFunctionDTO.class)
public Response register(@Context UriInfo uriInfo, @Context SecurityContext sc, TransformationFunctionDTO transformationFunctionDTO) throws IOException, FeaturestoreException {
    Users user = jWTHelper.getUserPrincipal(sc);
    TransformationFunction transformationFunction = transformationFunctionController.register(user, project, featurestore, transformationFunctionDTO);
    TransformationFunctionDTO newTransformationFunctionDTO = transformationFunctionBuilder.build(uriInfo, new ResourceRequest(ResourceRequest.Name.TRANSFORMATIONFUNCTIONS), user, project, featurestore, transformationFunction);
    return Response.ok().entity(newTransformationFunctionDTO).build();
}
Also used : TransformationFunctionDTO(io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionDTO) Users(io.hops.hopsworks.persistence.entity.user.Users) ResourceRequest(io.hops.hopsworks.common.api.ResourceRequest) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) POST(javax.ws.rs.POST) Consumes(javax.ws.rs.Consumes) Produces(javax.ws.rs.Produces) JWTRequired(io.hops.hopsworks.jwt.annotation.JWTRequired) ApiOperation(io.swagger.annotations.ApiOperation) ApiKeyRequired(io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired) AllowedProjectRoles(io.hops.hopsworks.api.filter.AllowedProjectRoles)

Aggregations

TransformationFunction (io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction)9 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)6 DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)3 Users (io.hops.hopsworks.persistence.entity.user.Users)3 TrainingDatasetFeatureDTO (io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO)2 TransformationFunctionDTO (io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionDTO)2 TransformationFunctionFacade (io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade)2 IOException (java.io.IOException)2 Streams (com.logicalclocks.shaded.com.google.common.collect.Streams)1 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)1 ApiKeyRequired (io.hops.hopsworks.api.filter.apiKey.ApiKeyRequired)1 ResourceRequest (io.hops.hopsworks.common.api.ResourceRequest)1 AbstractFacade (io.hops.hopsworks.common.dao.AbstractFacade)1 FeaturestoreConstants (io.hops.hopsworks.common.featurestore.FeaturestoreConstants)1 FeaturestoreFacade (io.hops.hopsworks.common.featurestore.FeaturestoreFacade)1 FeaturestoreActivityFacade (io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade)1 FeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController)1 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)1 OnlineFeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController)1 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)1