Search in sources :

Example 1 with CachedFeature

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method persistCachedFeaturegroupMetadata.

/**
 * Persists metadata of a new cached feature group in the cached_feature_group table
 *
 * @param hiveTable the id of the Hive table in the Hive metastore
 * @return Entity of the created cached feature group
 */
private CachedFeaturegroup persistCachedFeaturegroupMetadata(HiveTbls hiveTable, boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS) {
    CachedFeaturegroup cachedFeaturegroup = new CachedFeaturegroup();
    cachedFeaturegroup.setHiveTbls(hiveTable);
    cachedFeaturegroup.setOnlineEnabled(onlineEnabled);
    cachedFeaturegroup.setTimeTravelFormat(timeTravelFormat);
    cachedFeaturegroup.setFeaturesExtraConstraints(buildFeatureExtraConstrains(featureGroupFeatureDTOS, cachedFeaturegroup));
    cachedFeaturegroup.setCachedFeatures(featureGroupFeatureDTOS.stream().filter(feature -> feature.getDescription() != null).map(feature -> new CachedFeature(cachedFeaturegroup, feature.getName(), feature.getDescription())).collect(Collectors.toList()));
    cachedFeatureGroupFacade.persist(cachedFeaturegroup);
    return cachedFeaturegroup;
}
Also used : Arrays(java.util.Arrays) Connection(java.sql.Connection) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HiveTableParams(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTableParams) StringUtils(org.apache.commons.lang3.StringUtils) CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) Settings(io.hops.hopsworks.common.util.Settings) CryptoPasswordNotFoundException(io.hops.hopsworks.exceptions.CryptoPasswordNotFoundException) SqlNode(org.apache.calcite.sql.SqlNode) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreController(io.hops.hopsworks.common.featurestore.FeaturestoreController) ResultSet(java.sql.ResultSet) ProjectException(io.hops.hopsworks.exceptions.ProjectException) SqlSelect(org.apache.calcite.sql.SqlSelect) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) Utils(io.hops.hopsworks.common.hdfs.Utils) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) Stateless(javax.ejb.Stateless) Collection(java.util.Collection) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Logger(java.util.logging.Logger) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException) Collectors(java.util.stream.Collectors) Pair(org.javatuples.Pair) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) PostConstruct(javax.annotation.PostConstruct) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) KafkaException(io.hops.hopsworks.exceptions.KafkaException) HiveSqlDialect(org.apache.calcite.sql.dialect.HiveSqlDialect) ResultSetMetaData(java.sql.ResultSetMetaData) HopsSecurityException(io.hops.hopsworks.exceptions.HopsSecurityException) CertificateMaterializer(io.hops.hopsworks.common.security.CertificateMaterializer) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) SQLException(java.sql.SQLException) SqlLiteral(org.apache.calcite.sql.SqlLiteral) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) TransactionAttribute(javax.ejb.TransactionAttribute) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) EJB(javax.ejb.EJB) HiveController(io.hops.hopsworks.common.hive.HiveController) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SchemaException(io.hops.hopsworks.exceptions.SchemaException) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) SqlDialect(org.apache.calcite.sql.SqlDialect) ServiceException(io.hops.hopsworks.exceptions.ServiceException) UserException(io.hops.hopsworks.exceptions.UserException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) Statement(java.sql.Statement) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) SqlNodeList(org.apache.calcite.sql.SqlNodeList) Collections(java.util.Collections) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) DriverManager(java.sql.DriverManager) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)

Example 2 with CachedFeature

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method getFeaturesDTO.

public List<FeatureGroupFeatureDTO> getFeaturesDTO(Featuregroup featureGroup, Project project, Users user) throws FeaturestoreException {
    Collection<CachedFeatureExtraConstraints> featureExtraConstraints = featureGroup.getCachedFeaturegroup().getFeaturesExtraConstraints();
    HiveTbls hiveTable = featureGroup.getCachedFeaturegroup().getHiveTbls();
    List<SQLDefaultConstraint> defaultConstraints = offlineFeatureGroupController.getDefaultConstraints(featureGroup.getFeaturestore(), hiveTable.getTblName(), project, user);
    Collection<CachedFeature> cachedFeatures = featureGroup.getCachedFeaturegroup().getCachedFeatures();
    List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = new ArrayList<>();
    boolean primary;
    boolean hudiPrecombine;
    String defaultValue;
    String description;
    // Add all the columns - if there is a primary key constraint, set the primary key flag
    List<HiveColumns> sortedFeatures = hiveTable.getSdId().getCdId().getHiveColumnsCollection().stream().sorted(Comparator.comparing(HiveColumns::getIntegerIdx)).collect(Collectors.toList());
    for (HiveColumns hc : sortedFeatures) {
        primary = getPrimaryFlag(featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        description = getDescription(cachedFeatures, hc.getHiveColumnsPK().getColumnName());
        defaultValue = getDefaultValue(defaultConstraints, hc.getHiveColumnsPK().getColumnName());
        featureGroupFeatureDTOS.add(new FeatureGroupFeatureDTO(hc.getHiveColumnsPK().getColumnName(), hc.getTypeName(), description, primary, false, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    // Hive stores the partition columns separately
    // sort partition columns reversely cause they are then added at the beginning of list and therefore correct
    // order again
    List<HivePartitionKeys> sortedPartitionKeys = hiveTable.getHivePartitionKeysCollection().stream().sorted(Collections.reverseOrder(Comparator.comparing(HivePartitionKeys::getIntegerIdx))).collect(Collectors.toList());
    for (HivePartitionKeys pk : sortedPartitionKeys) {
        primary = getPrimaryFlag(featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        defaultValue = getDefaultValue(defaultConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        // insert partition keys at beginning
        featureGroupFeatureDTOS.add(0, new FeatureGroupFeatureDTO(pk.getHivePartitionKeysPK().getPkeyName(), pk.getPkeyType(), pk.getPkeyComment(), primary, true, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    if (featureGroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featureGroupFeatureDTOS = dropHudiSpecFeatureGroupFeature(featureGroupFeatureDTOS);
    }
    return featureGroupFeatureDTOS;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) ArrayList(java.util.ArrayList) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)

Aggregations

FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)2 CachedFeature (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature)2 CachedFeatureExtraConstraints (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints)2 HiveColumns (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns)2 HivePartitionKeys (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys)2 HiveTbls (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)2 ArrayList (java.util.ArrayList)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 Strings (com.google.common.base.Strings)1 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)1 FeaturestoreController (io.hops.hopsworks.common.featurestore.FeaturestoreController)1 FeaturestoreActivityFacade (io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade)1 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)1 OnlineFeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController)1 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)1 ConstructorController (io.hops.hopsworks.common.featurestore.query.ConstructorController)1 Feature (io.hops.hopsworks.common.featurestore.query.Feature)1 FeaturestoreUtils (io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils)1 Utils (io.hops.hopsworks.common.hdfs.Utils)1 HiveController (io.hops.hopsworks.common.hive.HiveController)1