Search in sources :

Example 6 with Dataset

use of io.hops.hopsworks.persistence.entity.dataset.Dataset in project hopsworks by logicalclocks.

the class HopsFSProvenanceController method updateProjectProvType.

public void updateProjectProvType(Project project, ProvTypeDTO newProvType, DistributedFileSystemOps dfso) throws ProvenanceException {
    String projectPath = Utils.getProjectPath(project.getName());
    ProvCoreDTO provCore = getProvCoreXAttr(projectPath, dfso);
    if (provCore != null && newProvType.equals(provCore.getType())) {
        return;
    }
    provCore = new ProvCoreDTO(newProvType, null);
    setProvCoreXAttr(projectPath, provCore, dfso);
    provCore = new ProvCoreDTO(newProvType, project.getInode().getId());
    for (Dataset dataset : project.getDatasetCollection()) {
        String datasetPath = Utils.getFileSystemDatasetPath(dataset, settings);
        ProvCoreDTO datasetProvCore = getProvCoreXAttr(datasetPath, dfso);
        if (datasetProvCore != null && (datasetProvCore.getType().equals(Provenance.Type.DISABLED.dto) || datasetProvCore.getType().equals(newProvType))) {
            continue;
        }
        updateDatasetProvType(datasetPath, provCore, dfso);
    }
}
Also used : ProvCoreDTO(io.hops.hopsworks.common.provenance.core.dto.ProvCoreDTO) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset)

Example 7 with Dataset

use of io.hops.hopsworks.persistence.entity.dataset.Dataset in project hopsworks by logicalclocks.

the class HopsFSProvenanceController method getDatasetsProvType.

public List<ProvDatasetDTO> getDatasetsProvType(Users user, Project project) throws ProvenanceException {
    String hdfsUsername = hdfsUsersController.getHdfsUserName(project, user);
    DistributedFileSystemOps udfso = dfs.getDfsOps(hdfsUsername);
    try {
        List<ProvDatasetDTO> result = new ArrayList<>();
        for (Dataset dataset : project.getDatasetCollection()) {
            String datasetPath = Utils.getFileSystemDatasetPath(dataset, settings);
            ProvCoreDTO provCore = getProvCoreXAttr(datasetPath, udfso);
            if (provCore == null) {
                throw new ProvenanceException(RESTCodes.ProvenanceErrorCode.INTERNAL_ERROR, Level.WARNING, "malformed dataset - provenance", "no provenance core xattr");
            }
            ProvDatasetDTO dsState = new ProvDatasetDTO(dataset.getName(), dataset.getInode().getId(), provCore.getType());
            result.add(dsState);
        }
        for (DatasetSharedWith dataset : project.getDatasetSharedWithCollection()) {
            String datasetPath = Utils.getFileSystemDatasetPath(dataset.getDataset(), settings);
            ProvCoreDTO provCore = getProvCoreXAttr(datasetPath, udfso);
            if (provCore == null) {
                throw new ProvenanceException(RESTCodes.ProvenanceErrorCode.INTERNAL_ERROR, Level.WARNING, "malformed dataset - provenance", "no provenance core xattr");
            }
            ProvDatasetDTO dsState = new ProvDatasetDTO(dataset.getDataset().getProject().getName() + "::" + dataset.getDataset().getName(), dataset.getDataset().getInode().getId(), provCore.getType());
            result.add(dsState);
        }
        return result;
    } finally {
        if (udfso != null) {
            dfs.closeDfsClient(udfso);
        }
    }
}
Also used : ProvDatasetDTO(io.hops.hopsworks.common.provenance.core.dto.ProvDatasetDTO) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) ProvCoreDTO(io.hops.hopsworks.common.provenance.core.dto.ProvCoreDTO) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) DatasetSharedWith(io.hops.hopsworks.persistence.entity.dataset.DatasetSharedWith) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) ArrayList(java.util.ArrayList)

Example 8 with Dataset

use of io.hops.hopsworks.persistence.entity.dataset.Dataset in project hopsworks by logicalclocks.

the class ProjectController method getQuotasInternal.

public QuotasDTO getQuotasInternal(Project project) {
    long hdfsQuota = -1L, hdfsUsage = -1L, hdfsNsQuota = -1L, hdfsNsCount = -1L, dbhdfsQuota = -1L, dbhdfsUsage = -1L, dbhdfsNsQuota = -1L, dbhdfsNsCount = -1L, fshdfsQuota = -1L, fshdfsUsage = -1L, fshdfsNsQuota = -1L, fshdfsNsCount = -1L;
    float yarnRemainingQuota = 0f, yarnTotalQuota = 0f;
    // Yarn Quota
    YarnProjectsQuota yarnQuota = yarnProjectsQuotaFacade.findByProjectName(project.getName());
    if (yarnQuota == null) {
        LOGGER.log(Level.SEVERE, "Cannot find YARN quota information for project: " + project.getName());
    } else {
        yarnRemainingQuota = yarnQuota.getQuotaRemaining();
        yarnTotalQuota = yarnQuota.getTotal();
    }
    // HDFS project directory quota
    Optional<HdfsDirectoryWithQuotaFeature> projectInodeAttrsOptional = hdfsDirectoryWithQuotaFeatureFacade.getByInodeId(project.getInode().getId());
    if (projectInodeAttrsOptional.isPresent()) {
        hdfsQuota = projectInodeAttrsOptional.get().getSsquota().longValue();
        hdfsUsage = projectInodeAttrsOptional.get().getStorageSpace().longValue();
        hdfsNsQuota = projectInodeAttrsOptional.get().getNsquota().longValue();
        hdfsNsCount = projectInodeAttrsOptional.get().getNscount().longValue();
    }
    // If the Hive service is enabled, get the quota information for the db directory
    List<Dataset> datasets = (List<Dataset>) project.getDatasetCollection();
    for (Dataset ds : datasets) {
        if (ds.getDsType() == DatasetType.HIVEDB) {
            Optional<HdfsDirectoryWithQuotaFeature> dbInodeAttrsOptional = hdfsDirectoryWithQuotaFeatureFacade.getByInodeId(ds.getInodeId());
            if (dbInodeAttrsOptional.isPresent()) {
                dbhdfsQuota = dbInodeAttrsOptional.get().getSsquota().longValue();
                dbhdfsUsage = dbInodeAttrsOptional.get().getStorageSpace().longValue();
                dbhdfsNsQuota = dbInodeAttrsOptional.get().getNsquota().longValue();
                dbhdfsNsCount = dbInodeAttrsOptional.get().getNscount().longValue();
            }
        } else if (ds.getDsType() == DatasetType.FEATURESTORE) {
            Optional<HdfsDirectoryWithQuotaFeature> fsInodeAttrsOptional = hdfsDirectoryWithQuotaFeatureFacade.getByInodeId(ds.getInodeId());
            if (fsInodeAttrsOptional.isPresent()) {
                fshdfsQuota = fsInodeAttrsOptional.get().getSsquota().longValue();
                fshdfsUsage = fsInodeAttrsOptional.get().getStorageSpace().longValue();
                fshdfsNsQuota = fsInodeAttrsOptional.get().getNsquota().longValue();
                fshdfsNsCount = fsInodeAttrsOptional.get().getNscount().longValue();
            }
        }
    }
    Integer kafkaQuota = project.getKafkaMaxNumTopics();
    return new QuotasDTO(yarnRemainingQuota, yarnTotalQuota, hdfsQuota, hdfsUsage, hdfsNsQuota, hdfsNsCount, dbhdfsQuota, dbhdfsUsage, dbhdfsNsQuota, dbhdfsNsCount, fshdfsQuota, fshdfsUsage, fshdfsNsQuota, fshdfsNsCount, kafkaQuota);
}
Also used : Optional(java.util.Optional) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) ArrayList(java.util.ArrayList) List(java.util.List) YarnProjectsQuota(io.hops.hopsworks.persistence.entity.jobs.quota.YarnProjectsQuota) HdfsDirectoryWithQuotaFeature(io.hops.hopsworks.persistence.entity.hdfs.HdfsDirectoryWithQuotaFeature)

Example 9 with Dataset

use of io.hops.hopsworks.persistence.entity.dataset.Dataset in project hopsworks by logicalclocks.

the class HopssiteService method getLocalDataset.

@GET
@Path("datasets/{publicDSId}/local")
public Response getLocalDataset(@PathParam("publicDSId") String publicDSId, @Context SecurityContext sc) {
    Optional<Dataset> datasets = datasetFacade.findByPublicDsId(publicDSId);
    if (!datasets.isPresent()) {
        return noCacheResponse.getNoCacheResponseBuilder(Response.Status.BAD_REQUEST).build();
    }
    Dataset ds = datasets.get();
    // to get the real parent project
    Inode parent = inodes.findParent(ds.getInode());
    LocalDatasetDTO datasetDTO = new LocalDatasetDTO(ds.getInodeId(), ds.getName(), ds.getDescription(), parent.getInodePK().getName());
    LOGGER.log(Settings.DELA_DEBUG, "Get a local dataset by public id.");
    return noCacheResponse.getNoCacheResponseBuilder(Response.Status.OK).entity(datasetDTO).build();
}
Also used : Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) LocalDatasetDTO(io.hops.hopsworks.api.hopssite.dto.LocalDatasetDTO) Path(javax.ws.rs.Path) GET(javax.ws.rs.GET)

Example 10 with Dataset

use of io.hops.hopsworks.persistence.entity.dataset.Dataset in project hopsworks by logicalclocks.

the class TrainingDatasetController method collectFeatures.

// Here we need to pass the list of training dataset joins so that we can rebuild the aliases.
// and handle correctly the case in which a feature group is joined with itself.
public List<TrainingDatasetFeature> collectFeatures(Query query, List<TrainingDatasetFeatureDTO> featureDTOs, TrainingDataset trainingDataset, FeatureView featureView, int featureIndex, List<TrainingDatasetJoin> tdJoins, int joinIndex) throws FeaturestoreException {
    List<TrainingDatasetFeature> features = new ArrayList<>();
    boolean isLabel = false;
    TransformationFunction transformationFunction = null;
    for (Feature f : query.getFeatures()) {
        if (featureDTOs != null && !featureDTOs.isEmpty()) {
            // identify if feature is label
            isLabel = featureDTOs.stream().anyMatch(dto -> f.getName().equals(dto.getName()) && dto.getLabel());
            // get transformation function for this feature
            transformationFunction = getTransformationFunction(f, featureDTOs);
        }
        features.add(trainingDataset != null ? new TrainingDatasetFeature(trainingDataset, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction) : new TrainingDatasetFeature(featureView, tdJoins.get(joinIndex), query.getFeaturegroup(), f.getName(), f.getType(), featureIndex++, isLabel, transformationFunction));
    }
    if (query.getJoins() != null) {
        for (Join join : query.getJoins()) {
            joinIndex++;
            List<TrainingDatasetFeature> joinFeatures = collectFeatures(join.getRightQuery(), featureDTOs, trainingDataset, featureView, featureIndex, tdJoins, joinIndex);
            features.addAll(joinFeatures);
            featureIndex += joinFeatures.size();
        }
    }
    return features;
}
Also used : TrainingDatasetFilter(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilter) FeaturegroupType(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.FeaturegroupType) Date(java.util.Date) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HopsfsTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetController) FileStatus(org.apache.hadoop.fs.FileStatus) HopsFSProvenanceController(io.hops.hopsworks.common.provenance.core.HopsFSProvenanceController) Settings(io.hops.hopsworks.common.util.Settings) TransactionAttributeType(javax.ejb.TransactionAttributeType) Map(java.util.Map) FilterValue(io.hops.hopsworks.common.featurestore.query.filter.FilterValue) Path(org.apache.hadoop.fs.Path) FeatureView(io.hops.hopsworks.persistence.entity.featurestore.featureview.FeatureView) TrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDataset) TrainingDatasetSplit(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.split.TrainingDatasetSplit) Utils(io.hops.hopsworks.common.hdfs.Utils) StatisticsConfig(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticsConfig) JoinType(org.apache.calcite.sql.JoinType) Stateless(javax.ejb.Stateless) TransformationFunctionFacade(io.hops.hopsworks.common.featurestore.transformationFunction.TransformationFunctionFacade) HopsfsTrainingDatasetFacade(io.hops.hopsworks.common.featurestore.trainingdatasets.hopsfs.HopsfsTrainingDatasetFacade) FeatureGroupCommit(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.FeatureGroupCommit) Collection(java.util.Collection) TrainingDatasetFeatureDTO(io.hops.hopsworks.common.featurestore.feature.TrainingDatasetFeatureDTO) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ActivityFacade(io.hops.hopsworks.common.dao.user.activity.ActivityFacade) Join(io.hops.hopsworks.common.featurestore.query.join.Join) StatisticColumnController(io.hops.hopsworks.common.featurestore.statistics.columns.StatisticColumnController) FeaturestoreConnectorFacade(io.hops.hopsworks.common.featurestore.storageconnectors.FeaturestoreConnectorFacade) Collectors(java.util.stream.Collectors) QueryController(io.hops.hopsworks.common.featurestore.query.QueryController) SqlFilterLogic(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.SqlFilterLogic) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) List(java.util.List) FeaturestoreFacade(io.hops.hopsworks.common.featurestore.FeaturestoreFacade) ExternalTrainingDatasetController(io.hops.hopsworks.common.featurestore.trainingdatasets.external.ExternalTrainingDatasetController) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) FeaturestoreConnector(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnector) DistributedFsService(io.hops.hopsworks.common.hdfs.DistributedFsService) ActivityFlag(io.hops.hopsworks.persistence.entity.user.activity.ActivityFlag) InodeController(io.hops.hopsworks.common.hdfs.inode.InodeController) DistributedFileSystemOps(io.hops.hopsworks.common.hdfs.DistributedFileSystemOps) TrainingDatasetJoinCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoinCondition) FeaturestoreConnectorType(io.hops.hopsworks.persistence.entity.featurestore.storageconnector.FeaturestoreConnectorType) HashMap(java.util.HashMap) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) Streams(com.logicalclocks.shaded.com.google.common.collect.Streams) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) TrainingDatasetFilterCondition(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFilterCondition) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) HopsfsTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.hopsfs.HopsfsTrainingDataset) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) FeatureGroupCommitController(io.hops.hopsworks.common.featurestore.featuregroup.cached.FeatureGroupCommitController) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) PitJoinController(io.hops.hopsworks.common.featurestore.query.pit.PitJoinController) TransactionAttribute(javax.ejb.TransactionAttribute) HdfsUsersController(io.hops.hopsworks.common.hdfs.HdfsUsersController) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) Query(io.hops.hopsworks.common.featurestore.query.Query) Filter(io.hops.hopsworks.common.featurestore.query.filter.Filter) TrainingDatasetType(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetType) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) ProvenanceException(io.hops.hopsworks.exceptions.ProvenanceException) EJB(javax.ejb.EJB) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) StatisticsController(io.hops.hopsworks.common.featurestore.statistics.StatisticsController) ExternalTrainingDataset(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.external.ExternalTrainingDataset) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) FeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupController) ServiceException(io.hops.hopsworks.exceptions.ServiceException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) QueryDTO(io.hops.hopsworks.common.featurestore.query.QueryDTO) StatisticColumn(io.hops.hopsworks.persistence.entity.featurestore.statistics.StatisticColumn) Dataset(io.hops.hopsworks.persistence.entity.dataset.Dataset) FilterLogic(io.hops.hopsworks.common.featurestore.query.filter.FilterLogic) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature) ArrayList(java.util.ArrayList) Join(io.hops.hopsworks.common.featurestore.query.join.Join) TrainingDatasetJoin(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetJoin) TransformationFunction(io.hops.hopsworks.persistence.entity.featurestore.transformationFunction.TransformationFunction) Feature(io.hops.hopsworks.common.featurestore.query.Feature) TrainingDatasetFeature(io.hops.hopsworks.persistence.entity.featurestore.trainingdataset.TrainingDatasetFeature)

Aggregations

Dataset (io.hops.hopsworks.persistence.entity.dataset.Dataset)73 Inode (io.hops.hopsworks.persistence.entity.hdfs.inode.Inode)24 Project (io.hops.hopsworks.persistence.entity.project.Project)24 DatasetException (io.hops.hopsworks.exceptions.DatasetException)23 Path (javax.ws.rs.Path)15 DistributedFileSystemOps (io.hops.hopsworks.common.hdfs.DistributedFileSystemOps)14 Produces (javax.ws.rs.Produces)14 Path (org.apache.hadoop.fs.Path)14 Users (io.hops.hopsworks.persistence.entity.user.Users)13 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)12 DatasetSharedWith (io.hops.hopsworks.persistence.entity.dataset.DatasetSharedWith)12 IOException (java.io.IOException)11 ArrayList (java.util.ArrayList)11 AllowedProjectRoles (io.hops.hopsworks.api.filter.AllowedProjectRoles)8 DatasetPath (io.hops.hopsworks.common.dataset.util.DatasetPath)8 ProjectException (io.hops.hopsworks.exceptions.ProjectException)8 JWTRequired (io.hops.hopsworks.jwt.annotation.JWTRequired)8 GET (javax.ws.rs.GET)8 GenericException (io.hops.hopsworks.exceptions.GenericException)7 HopsSecurityException (io.hops.hopsworks.exceptions.HopsSecurityException)7