Search in sources :

Example 1 with HiveTbls

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.

the class TestFeatureGroupCommitController method setup.

@Before
public void setup() {
    Inode inode = new Inode();
    HiveSds hiveSds = new HiveSds();
    hiveSds.setSdId(1l);
    hiveSds.setLocation("hopsfs://namenode.service.consul:8020/apps/hive/warehouse/test_proj_featurestore.db/fg1_1");
    hiveSds.setInode(inode);
    HiveTbls hiveTbls = new HiveTbls();
    hiveTbls.setSdId(hiveSds);
    hiveTbls.setTblName("fg1_1");
    CachedFeaturegroup cachedFeaturegroup = new CachedFeaturegroup();
    cachedFeaturegroup.setHiveTbls(hiveTbls);
    fs = new Featurestore();
    fs.setHiveDbId(1l);
    fs.setProject(new Project("test_proj"));
    fg1 = new Featuregroup(1);
    fg1.setName("fg1_1");
    fg1.setVersion(1);
    fg1.setFeaturestore(fs);
    fg1.setCachedFeaturegroup(cachedFeaturegroup);
}
Also used : HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls) Project(io.hops.hopsworks.persistence.entity.project.Project) Inode(io.hops.hopsworks.persistence.entity.hdfs.inode.Inode) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) HiveSds(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveSds) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) Before(org.junit.Before)

Example 2 with HiveTbls

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method createCachedFeaturegroup.

/**
 * Persists a cached feature group
 *
 * @param featurestore the featurestore of the feature group
 * @param cachedFeaturegroupDTO the user input data to use when creating the cached feature group
 * @param user the user making the request
 * @return the created entity
 */
public CachedFeaturegroup createCachedFeaturegroup(Featurestore featurestore, CachedFeaturegroupDTO cachedFeaturegroupDTO, Project project, Users user) throws FeaturestoreException, SQLException, KafkaException, SchemaException, ProjectException, UserException, ServiceException, HopsSecurityException, IOException {
    verifyPrimaryKey(cachedFeaturegroupDTO.getFeatures(), cachedFeaturegroupDTO.getTimeTravelFormat());
    // Prepare DDL statement
    String tableName = getTblName(cachedFeaturegroupDTO.getName(), cachedFeaturegroupDTO.getVersion());
    offlineFeatureGroupController.createHiveTable(featurestore, tableName, cachedFeaturegroupDTO.getDescription(), cachedFeaturegroupDTO.getTimeTravelFormat() == TimeTravelFormat.HUDI ? addHudiSpecFeatures(cachedFeaturegroupDTO.getFeatures()) : cachedFeaturegroupDTO.getFeatures(), project, user, getTableFormat(cachedFeaturegroupDTO.getTimeTravelFormat()));
    boolean onlineEnabled = settings.isOnlineFeaturestore() && cachedFeaturegroupDTO.getOnlineEnabled();
    // Get HiveTblId of the newly created table from the metastore
    HiveTbls hiveTbls = cachedFeatureGroupFacade.getHiveTableByNameAndDB(tableName, featurestore.getHiveDbId()).orElseThrow(() -> new FeaturestoreException(RESTCodes.FeaturestoreErrorCode.COULD_NOT_CREATE_FEATUREGROUP, Level.WARNING, "", "Table created correctly but not in the metastore"));
    // Persist cached feature group
    return persistCachedFeaturegroupMetadata(hiveTbls, onlineEnabled, cachedFeaturegroupDTO.getTimeTravelFormat(), cachedFeaturegroupDTO.getFeatures());
}
Also used : HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException)

Example 3 with HiveTbls

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method persistCachedFeaturegroupMetadata.

/**
 * Persists metadata of a new cached feature group in the cached_feature_group table
 *
 * @param hiveTable the id of the Hive table in the Hive metastore
 * @return Entity of the created cached feature group
 */
private CachedFeaturegroup persistCachedFeaturegroupMetadata(HiveTbls hiveTable, boolean onlineEnabled, TimeTravelFormat timeTravelFormat, List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS) {
    CachedFeaturegroup cachedFeaturegroup = new CachedFeaturegroup();
    cachedFeaturegroup.setHiveTbls(hiveTable);
    cachedFeaturegroup.setOnlineEnabled(onlineEnabled);
    cachedFeaturegroup.setTimeTravelFormat(timeTravelFormat);
    cachedFeaturegroup.setFeaturesExtraConstraints(buildFeatureExtraConstrains(featureGroupFeatureDTOS, cachedFeaturegroup));
    cachedFeaturegroup.setCachedFeatures(featureGroupFeatureDTOS.stream().filter(feature -> feature.getDescription() != null).map(feature -> new CachedFeature(cachedFeaturegroup, feature.getName(), feature.getDescription())).collect(Collectors.toList()));
    cachedFeatureGroupFacade.persist(cachedFeaturegroup);
    return cachedFeaturegroup;
}
Also used : Arrays(java.util.Arrays) Connection(java.sql.Connection) Feature(io.hops.hopsworks.common.featurestore.query.Feature) HiveTableParams(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTableParams) StringUtils(org.apache.commons.lang3.StringUtils) CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) Settings(io.hops.hopsworks.common.util.Settings) CryptoPasswordNotFoundException(io.hops.hopsworks.exceptions.CryptoPasswordNotFoundException) SqlNode(org.apache.calcite.sql.SqlNode) TransactionAttributeType(javax.ejb.TransactionAttributeType) FeaturestoreController(io.hops.hopsworks.common.featurestore.FeaturestoreController) ResultSet(java.sql.ResultSet) ProjectException(io.hops.hopsworks.exceptions.ProjectException) SqlSelect(org.apache.calcite.sql.SqlSelect) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) Utils(io.hops.hopsworks.common.hdfs.Utils) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) Stateless(javax.ejb.Stateless) Collection(java.util.Collection) Featuregroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup) RESTCodes(io.hops.hopsworks.restutils.RESTCodes) ConstructorController(io.hops.hopsworks.common.featurestore.query.ConstructorController) Logger(java.util.logging.Logger) ServiceDiscoveryException(com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException) Collectors(java.util.stream.Collectors) Pair(org.javatuples.Pair) FileNotFoundException(java.io.FileNotFoundException) List(java.util.List) SqlStdOperatorTable(org.apache.calcite.sql.fun.SqlStdOperatorTable) PostConstruct(javax.annotation.PostConstruct) FeaturestoreUtils(io.hops.hopsworks.common.featurestore.utils.FeaturestoreUtils) Optional(java.util.Optional) KafkaException(io.hops.hopsworks.exceptions.KafkaException) HiveSqlDialect(org.apache.calcite.sql.dialect.HiveSqlDialect) ResultSetMetaData(java.sql.ResultSetMetaData) HopsSecurityException(io.hops.hopsworks.exceptions.HopsSecurityException) CertificateMaterializer(io.hops.hopsworks.common.security.CertificateMaterializer) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls) FeaturestoreActivityMeta(io.hops.hopsworks.persistence.entity.featurestore.activity.FeaturestoreActivityMeta) OnlineFeaturestoreController(io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController) Project(io.hops.hopsworks.persistence.entity.project.Project) ArrayList(java.util.ArrayList) Level(java.util.logging.Level) Strings(com.google.common.base.Strings) FeaturestoreException(io.hops.hopsworks.exceptions.FeaturestoreException) SQLException(java.sql.SQLException) SqlLiteral(org.apache.calcite.sql.SqlLiteral) FeaturestoreActivityFacade(io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade) TransactionAttribute(javax.ejb.TransactionAttribute) OnlineFeaturegroupController(io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController) SqlIdentifier(org.apache.calcite.sql.SqlIdentifier) EJB(javax.ejb.EJB) HiveController(io.hops.hopsworks.common.hive.HiveController) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SchemaException(io.hops.hopsworks.exceptions.SchemaException) IOException(java.io.IOException) Featurestore(io.hops.hopsworks.persistence.entity.featurestore.Featurestore) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) SqlDialect(org.apache.calcite.sql.SqlDialect) ServiceException(io.hops.hopsworks.exceptions.ServiceException) UserException(io.hops.hopsworks.exceptions.UserException) TimeTravelFormat(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.TimeTravelFormat) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup) Statement(java.sql.Statement) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) Users(io.hops.hopsworks.persistence.entity.user.Users) Comparator(java.util.Comparator) SqlNodeList(org.apache.calcite.sql.SqlNodeList) Collections(java.util.Collections) FeaturegroupDTO(io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO) DriverManager(java.sql.DriverManager) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) CachedFeaturegroup(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)

Example 4 with HiveTbls

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.

the class CachedFeaturegroupController method getFeaturesDTO.

public List<FeatureGroupFeatureDTO> getFeaturesDTO(Featuregroup featureGroup, Project project, Users user) throws FeaturestoreException {
    Collection<CachedFeatureExtraConstraints> featureExtraConstraints = featureGroup.getCachedFeaturegroup().getFeaturesExtraConstraints();
    HiveTbls hiveTable = featureGroup.getCachedFeaturegroup().getHiveTbls();
    List<SQLDefaultConstraint> defaultConstraints = offlineFeatureGroupController.getDefaultConstraints(featureGroup.getFeaturestore(), hiveTable.getTblName(), project, user);
    Collection<CachedFeature> cachedFeatures = featureGroup.getCachedFeaturegroup().getCachedFeatures();
    List<FeatureGroupFeatureDTO> featureGroupFeatureDTOS = new ArrayList<>();
    boolean primary;
    boolean hudiPrecombine;
    String defaultValue;
    String description;
    // Add all the columns - if there is a primary key constraint, set the primary key flag
    List<HiveColumns> sortedFeatures = hiveTable.getSdId().getCdId().getHiveColumnsCollection().stream().sorted(Comparator.comparing(HiveColumns::getIntegerIdx)).collect(Collectors.toList());
    for (HiveColumns hc : sortedFeatures) {
        primary = getPrimaryFlag(featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, hc.getHiveColumnsPK().getColumnName());
        description = getDescription(cachedFeatures, hc.getHiveColumnsPK().getColumnName());
        defaultValue = getDefaultValue(defaultConstraints, hc.getHiveColumnsPK().getColumnName());
        featureGroupFeatureDTOS.add(new FeatureGroupFeatureDTO(hc.getHiveColumnsPK().getColumnName(), hc.getTypeName(), description, primary, false, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    // Hive stores the partition columns separately
    // sort partition columns reversely cause they are then added at the beginning of list and therefore correct
    // order again
    List<HivePartitionKeys> sortedPartitionKeys = hiveTable.getHivePartitionKeysCollection().stream().sorted(Collections.reverseOrder(Comparator.comparing(HivePartitionKeys::getIntegerIdx))).collect(Collectors.toList());
    for (HivePartitionKeys pk : sortedPartitionKeys) {
        primary = getPrimaryFlag(featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        hudiPrecombine = getPrecombineFlag(featureGroup, featureExtraConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        defaultValue = getDefaultValue(defaultConstraints, pk.getHivePartitionKeysPK().getPkeyName());
        // insert partition keys at beginning
        featureGroupFeatureDTOS.add(0, new FeatureGroupFeatureDTO(pk.getHivePartitionKeysPK().getPkeyName(), pk.getPkeyType(), pk.getPkeyComment(), primary, true, hudiPrecombine, defaultValue, featureGroup.getId()));
    }
    if (featureGroup.getCachedFeaturegroup().getTimeTravelFormat() == TimeTravelFormat.HUDI) {
        featureGroupFeatureDTOS = dropHudiSpecFeatureGroupFeature(featureGroupFeatureDTOS);
    }
    return featureGroupFeatureDTOS;
}
Also used : CachedFeatureExtraConstraints(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints) FeatureGroupFeatureDTO(io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO) CachedFeature(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature) ArrayList(java.util.ArrayList) HiveColumns(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns) HivePartitionKeys(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys) SQLDefaultConstraint(org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint) HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)

Example 5 with HiveTbls

use of io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls in project hopsworks by logicalclocks.

the class FeatureGroupCommitController method computeHudiCommitPath.

protected String computeHudiCommitPath(Featuregroup featuregroup, String commitDateString) {
    // Check if CommitDateString matches pattern to "yyyyMMddHHmmss"
    try {
        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
        dateFormat.parse(commitDateString).getTime();
    } catch (ParseException e) {
        LOGGER.log(Level.SEVERE, "Unable to recognize provided HUDI commitDateString ", e);
    }
    HiveTbls hiveTbls = featuregroup.getCachedFeaturegroup().getHiveTbls();
    String dbLocation = hiveTbls.getSdId().getLocation();
    Path commitMetadataPath = new Path(HOODIE_METADATA_DIR, commitDateString + HOODIE_COMMIT_METADATA_FILE);
    Path commitPath = new Path(dbLocation, commitMetadataPath);
    return commitPath.toString();
}
Also used : HiveTbls(io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls) Path(org.apache.hadoop.fs.Path) ParseException(java.text.ParseException) SimpleDateFormat(java.text.SimpleDateFormat)

Aggregations

HiveTbls (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveTbls)5 FeatureGroupFeatureDTO (io.hops.hopsworks.common.featurestore.feature.FeatureGroupFeatureDTO)2 FeaturestoreException (io.hops.hopsworks.exceptions.FeaturestoreException)2 Featurestore (io.hops.hopsworks.persistence.entity.featurestore.Featurestore)2 Featuregroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.Featuregroup)2 CachedFeature (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeature)2 CachedFeatureExtraConstraints (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeatureExtraConstraints)2 CachedFeaturegroup (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.CachedFeaturegroup)2 HiveColumns (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HiveColumns)2 HivePartitionKeys (io.hops.hopsworks.persistence.entity.featurestore.featuregroup.cached.hive.HivePartitionKeys)2 Project (io.hops.hopsworks.persistence.entity.project.Project)2 ArrayList (java.util.ArrayList)2 SQLDefaultConstraint (org.apache.hadoop.hive.metastore.api.SQLDefaultConstraint)2 Strings (com.google.common.base.Strings)1 ServiceDiscoveryException (com.logicalclocks.servicediscoverclient.exceptions.ServiceDiscoveryException)1 FeaturestoreController (io.hops.hopsworks.common.featurestore.FeaturestoreController)1 FeaturestoreActivityFacade (io.hops.hopsworks.common.featurestore.activity.FeaturestoreActivityFacade)1 FeaturegroupDTO (io.hops.hopsworks.common.featurestore.featuregroup.FeaturegroupDTO)1 OnlineFeaturegroupController (io.hops.hopsworks.common.featurestore.featuregroup.online.OnlineFeaturegroupController)1 OnlineFeaturestoreController (io.hops.hopsworks.common.featurestore.online.OnlineFeaturestoreController)1