use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.
the class TestFeatureGroupCommitController method setup.
@Before
public void setup() {
Inode inode = new Inode();
HiveSds hiveSds = new HiveSds();
hiveSds.setSdId(1l);
hiveSds.setLocation("hopsfs://namenode.service.consul:8020/apps/hive/warehouse/test_proj_featurestore.db/fg1_1");
hiveSds.setInode(inode);
HiveTbls hiveTbls = new HiveTbls();
hiveTbls.setSdId(hiveSds);
hiveTbls.setTblName("fg1_1");
CachedFeaturegroup cachedFeaturegroup = new CachedFeaturegroup();
cachedFeaturegroup.setHiveTbls(hiveTbls);
fs = new Featurestore();
fs.setHiveDbId(1l);
fs.setProject(new Project("test_proj"));
fg1 = new Featuregroup(1);
fg1.setName("fg1_1");
fg1.setVersion(1);
fg1.setFeaturestore(fs);
fg1.setCachedFeaturegroup(cachedFeaturegroup);
}
use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.
the class CachedFeaturegroupController method createCachedFeaturegroup.
/**
* Persists a cached feature group
*
* @param featurestore the featurestore of the feature group
* @param cachedFeaturegroupDTO the user input data to use when creating the cached feature group
* @param user the user making the request
* @return the created entity
*/
public CachedFeaturegroup createCachedFeaturegroup(Featurestore featurestore, CachedFeaturegroupDTO cachedFeaturegroupDTO, Project project, Users user) throws FeaturestoreException, SQLException, KafkaException, SchemaException, ProjectException, UserException, ServiceException, HopsSecurityException, IOException {
verifyPrimaryKey(cachedFeaturegroupDTO.getFeatures(), cachedFeaturegroupDTO.getTimeTravelFormat());
// Prepare DDL statement
String tableName = getTblName(cachedFeaturegroupDTO.getName(), cachedFeaturegroupDTO.getVersion());
offlineFeatureGroupController.createHiveTable(featurestore, tableName, cachedFeaturegroupDTO.getDescription(), cachedFeaturegroupDTO.getTimeTravelFormat() == TimeTravelFormat.HUDI ? addHudiSpecFeatures(cachedFeaturegroupDTO.getFeatures()) : cachedFeaturegroupDTO.getFeatures(), project, user, getTableFormat(cachedFeaturegroupDTO.getTimeTravelFormat()));
boolean onlineEnabled = settings.isOnlineFeaturestore() && cachedFeaturegroupDTO.getOnlineEnabled();
// Get HiveTblId of the newly created table from the metastore
HiveTbls hiveTbls = cachedFeatureGroupFacade.getHiveTableByNameAndDB(tableName, featurestore.getHiveDbId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.COULD_NOT_CREATE_FEATUREGROUP, Level.WARNING, "", "Table created correctly but not in the metastore"));
// Persist cached feature group
return persistCachedFeaturegroupMetadata(hiveTbls, onlineEnabled, cachedFeaturegroupDTO.getTimeTravelFormat(), cachedFeaturegroupDTO.getFeatures());
}
use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.
the class CachedFeaturegroupController method persistCachedFeaturegroupMetadata.
/**
* Persists metadata of a new cached feature group in the cached_feature_group table
*
* @param hiveTable the id of the Hive table in the Hive metastore
* @return Entity of the created cached feature group
*/
private CachedFeaturegroup persistCachedFeaturegroupMetadata(HiveTbls hiveTable, boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS) {
CachedFeaturegroup cachedFeaturegroup = new CachedFeaturegroup();
cachedFeaturegroup.setHiveTbls(hiveTable);
cachedFeaturegroup.setOnlineEnabled(onlineEnabled);
cachedFeaturegroup.setTimeTravelFormat(timeTravelFormat);
cachedFeaturegroup.setFeaturesExtraConstraints(buildFeatureExtraConstrains(featureGroupFeatureDTOS, cachedFeaturegroup));
cachedFeaturegroup.setCachedFeatures(featureGroupFeatureDTOS.stream().filter(feature -> feature.getDescription() != null).map(feature -> new CachedFeature(cachedFeaturegroup, feature.getName(), feature.getDescription())).collect(Collectors.toList()));
cachedFeatureGroupFacade.persist(cachedFeaturegroup);
return cachedFeaturegroup;
}
use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.
the class CachedFeaturegroupController method getFeaturesDTO.
public List<FeatureGroupFeatureDTO> getFeaturesDTO(Featuregroup featureGroup, Project project, Users user) throws FeaturestoreException {
Collection<CachedFeatureExtraConstraints> featureExtraConstraints = featureGroup.getCachedFeaturegroup().getFeaturesExtraConstraints();
HiveTbls hiveTable = featureGroup.getCachedFeaturegroup().getHiveTbls();
List<SQLDefaultConstraint> defaultConstraints = offlineFeatureGroupController.getDefaultConstraints(featureGroup.getFeaturestore(), hiveTable.getTblName(), project, user);
Collection<CachedFeature> cachedFeatures = featureGroup.getCachedFeaturegroup().getCachedFeatures();
List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = new ArrayList<>();
boolean primary;
boolean hudiPrecombine;
String defaultValue;
String description;
// Add all the columns - if there is a primary key constraint, set the primary key flag
List<HiveColumns> sortedFeatures = hiveTable.getSdId().getCdId().getHiveColumnsCollection().stream().sorted(Comparator.comparing(HiveColumns::getIntegerIdx)).collect(Collectors.toList());
for (HiveColumns hc : sortedFeatures) {
primary = getPrimaryFlag(featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
description = getDescription(cachedFeatures, hc.getHiveColumnsPK().getColumnName());
defaultValue = getDefaultValue(defaultConstraints, hc.getHiveColumnsPK().getColumnName());
featureGroupFeatureDTOS.add(new FeatureGroupFeatureDTO(hc.getHiveColumnsPK().getColumnName(), hc.getTypeName(), description, primary, false, hudiPrecombine, defaultValue, featureGroup.getId()));
}
// Hive stores the partition columns separately
// sort partition columns reversely cause they are then added at the beginning of list and therefore correct
// order again
List<HivePartitionKeys> sortedPartitionKeys = hiveTable.getHivePartitionKeysCollection().stream().sorted(Collections.reverseOrder(Comparator.comparing(HivePartitionKeys::getIntegerIdx))).collect(Collectors.toList());
for (HivePartitionKeys pk : sortedPartitionKeys) {
primary = getPrimaryFlag(featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
defaultValue = getDefaultValue(defaultConstraints, pk.getHivePartitionKeysPK().getPkeyName());
// insert partition keys at beginning
featureGroupFeatureDTOS.add(0, new FeatureGroupFeatureDTO(pk.getHivePartitionKeysPK().getPkeyName(), pk.getPkeyType(), pk.getPkeyComment(), primary, true, hudiPrecombine, defaultValue, featureGroup.getId()));
}
if (featureGroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
featureGroupFeatureDTOS = dropHudiSpecFeatureGroupFeature(featureGroupFeatureDTOS);
}
return featureGroupFeatureDTOS;
}
use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.
the class FeatureGroupCommitController method computeHudiCommitPath.
protected String computeHudiCommitPath(Featuregroup featuregroup, String commitDateString) {
// Check if CommitDateString matches pattern to "yyyyMMddHHmmss"
try {
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
dateFormat.parse(commitDateString).getTime();
} catch (ParseException e) {
LOGGER.log(Level.SEVERE, "Unable to recognize provided HUDI commitDateString ", e);
}
HiveTbls hiveTbls = featuregroup.getCachedFeaturegroup().getHiveTbls();
String dbLocation = hiveTbls.getSdId().getLocation();
Path commitMetadataPath = new Path(HOODIE_METADATA_DIR, commitDateString + HOODIE_COMMIT_METADATA_FILE);
Path commitPath = new Path(dbLocation, commitMetadataPath);
return commitPath.toString();
}
Aggregations