Search in sources :

Example 1 with HiveColumns

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method getFeaturesDTO.

public List<FeatureGroupFeatureDTO> getFeaturesDTO(Featuregroup featureGroup, Project project, Users user) throws FeaturestoreException {
    Collection<CachedFeatureExtraConstraints> featureExtraConstraints = featureGroup.getCachedFeaturegroup().getFeaturesExtraConstraints();
    HiveTbls hiveTable = featureGroup.getCachedFeaturegroup().getHiveTbls();
    List<SQLDefaultConstraint> defaultConstraints = offlineFeatureGroupController.getDefaultConstraints(featureGroup.getFeaturestore(), hiveTable.getTblName(), project, user);
    Collection<CachedFeature> cachedFeatures = featureGroup.getCachedFeaturegroup().getCachedFeatures();
    List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = new ArrayList<>();
    boolean primary;
    boolean hudiPrecombine;
    String defaultValue;
    String description;
    // Add all the columns - if there is a primary key constraint, set the primary key flag
    List<HiveColumns> sortedFeatures = hiveTable.getSdId().getCdId().getHiveColumnsCollection().stream().sorted(Comparator.comparing(HiveColumns::getIntegerIdx)).collect(Collectors.toList());
    for (HiveColumns hc : sortedFeatures) {
        primary = getPrimaryFlag(featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        description = getDescription(cachedFeatures, hc.getHiveColumnsPK().getColumnName());
        defaultValue = getDefaultValue(defaultConstraints, hc.getHiveColumnsPK().getColumnName());
        featureGroupFeatureDTOS.add(new FeatureGroupFeatureDTO(hc.getHiveColumnsPK().getColumnName(), hc.getTypeName(), description, primary, false, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    // Hive stores the partition columns separately
    // sort partition columns reversely cause they are then added at the beginning of list and therefore correct
    // order again
    List<HivePartitionKeys> sortedPartitionKeys = hiveTable.getHivePartitionKeysCollection().stream().sorted(Collections.reverseOrder(Comparator.comparing(HivePartitionKeys::getIntegerIdx))).collect(Collectors.toList());
    for (HivePartitionKeys pk : sortedPartitionKeys) {
        primary = getPrimaryFlag(featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        defaultValue = getDefaultValue(defaultConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        // insert partition keys at beginning
        featureGroupFeatureDTOS.add(0, new FeatureGroupFeatureDTO(pk.getHivePartitionKeysPK().getPkeyName(), pk.getPkeyType(), pk.getPkeyComment(), primary, true, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    if (featureGroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featureGroupFeatureDTOS = dropHudiSpecFeatureGroupFeature(featureGroupFeatureDTOS);
    }
    return featureGroupFeatureDTOS;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) ArrayList(java.util.ArrayList) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)

Aggregations

FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)1 CachedFeature (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature)1 CachedFeatureExtraConstraints (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints)1 HiveColumns (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns)1 HivePartitionKeys (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys)1 HiveTbls (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)1 ArrayList (java.util.ArrayList)1 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)1