Search in sources :

Example 1 with CachedFeatureExtraConstraints

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method buildFeatureExtraConstrains.

private List<CachedFeatureExtraConstraints> buildFeatureExtraConstrains(List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS, CachedFeaturegroup cachedFeaturegroup) {
    List<CachedFeatureExtraConstraints> cachedFeatureExtraConstraints = new ArrayList<>();
    List<String> pkNames = featureGroupFeatureDTOS.stream().filter(FeatureGroupFeatureDTO::getPrimary).map(FeatureGroupFeatureDTO::getName).collect(Collectors.toList());
    // hudi precombine key is always one feature
    String hudiPrecombineKeyName = featureGroupFeatureDTOS.stream().filter(FeatureGroupFeatureDTO::getHudiPrecombineKey).map(FeatureGroupFeatureDTO::getName).findFirst().orElse(null);
    boolean primaryKeyIsHudiPrecombineKey = false;
    if (cachedFeaturegroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        if (hudiPrecombineKeyName == null) {
            // hudi precombine key is always one feature, we pick up 1st primary key
            hudiPrecombineKeyName = pkNames.get(0);
            primaryKeyIsHudiPrecombineKey = true;
        } else {
            // User may set primary key as precombine key
            primaryKeyIsHudiPrecombineKey = pkNames.contains(hudiPrecombineKeyName);
        }
    }
    for (String pkName : pkNames) {
        cachedFeatureExtraConstraints.add(new CachedFeatureExtraConstraints(cachedFeaturegroup, pkName, true, pkName.equals(hudiPrecombineKeyName)));
    }
    if (!primaryKeyIsHudiPrecombineKey && cachedFeaturegroup.getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        cachedFeatureExtraConstraints.add(new CachedFeatureExtraConstraints(cachedFeaturegroup, hudiPrecombineKeyName, false, true));
    }
    return cachedFeatureExtraConstraints;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) ArrayList(java.util.ArrayList)

Example 2 with CachedFeatureExtraConstraints

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method getFeaturesDTO.

public List<FeatureGroupFeatureDTO> getFeaturesDTO(Featuregroup featureGroup, Project project, Users user) throws FeaturestoreException {
    Collection<CachedFeatureExtraConstraints> featureExtraConstraints = featureGroup.getCachedFeaturegroup().getFeaturesExtraConstraints();
    HiveTbls hiveTable = featureGroup.getCachedFeaturegroup().getHiveTbls();
    List<SQLDefaultConstraint> defaultConstraints = offlineFeatureGroupController.getDefaultConstraints(featureGroup.getFeaturestore(), hiveTable.getTblName(), project, user);
    Collection<CachedFeature> cachedFeatures = featureGroup.getCachedFeaturegroup().getCachedFeatures();
    List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = new ArrayList<>();
    boolean primary;
    boolean hudiPrecombine;
    String defaultValue;
    String description;
    // Add all the columns - if there is a primary key constraint, set the primary key flag
    List<HiveColumns> sortedFeatures = hiveTable.getSdId().getCdId().getHiveColumnsCollection().stream().sorted(Comparator.comparing(HiveColumns::getIntegerIdx)).collect(Collectors.toList());
    for (HiveColumns hc : sortedFeatures) {
        primary = getPrimaryFlag(featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        description = getDescription(cachedFeatures, hc.getHiveColumnsPK().getColumnName());
        defaultValue = getDefaultValue(defaultConstraints, hc.getHiveColumnsPK().getColumnName());
        featureGroupFeatureDTOS.add(new FeatureGroupFeatureDTO(hc.getHiveColumnsPK().getColumnName(), hc.getTypeName(), description, primary, false, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    // Hive stores the partition columns separately
    // sort partition columns reversely cause they are then added at the beginning of list and therefore correct
    // order again
    List<HivePartitionKeys> sortedPartitionKeys = hiveTable.getHivePartitionKeysCollection().stream().sorted(Collections.reverseOrder(Comparator.comparing(HivePartitionKeys::getIntegerIdx))).collect(Collectors.toList());
    for (HivePartitionKeys pk : sortedPartitionKeys) {
        primary = getPrimaryFlag(featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        defaultValue = getDefaultValue(defaultConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        // insert partition keys at beginning
        featureGroupFeatureDTOS.add(0, new FeatureGroupFeatureDTO(pk.getHivePartitionKeysPK().getPkeyName(), pk.getPkeyType(), pk.getPkeyComment(), primary, true, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    if (featureGroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featureGroupFeatureDTOS = dropHudiSpecFeatureGroupFeature(featureGroupFeatureDTOS);
    }
    return featureGroupFeatureDTOS;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) ArrayList(java.util.ArrayList) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)

Aggregations

FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)2 CachedFeatureExtraConstraints (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints)2 ArrayList (java.util.ArrayList)2 CachedFeature (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature)1 HiveColumns (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns)1 HivePartitionKeys (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys)1 HiveTbls (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)1 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)1