Search in sources :

Example 1 with Catalogs

use of org.apache.iceberg.mr.Catalogs in project hive by apache.

the class HiveIcebergMetaHook method getCatalogProperties.

/**
 * Calculates the properties we would like to send to the catalog.
 * <ul>
 * <li>The base of the properties is the properties stored at the Hive Metastore for the given table
 * <li>We add the {@link Catalogs#LOCATION} as the table location
 * <li>We add the {@link Catalogs#NAME} as TableIdentifier defined by the database name and table name
 * <li>We add the serdeProperties of the HMS table
 * <li>We remove some parameters that we don't want to push down to the Iceberg table props
 * </ul>
 * @param hmsTable Table for which we are calculating the properties
 * @return The properties we can provide for Iceberg functions, like {@link Catalogs}
 */
private static Properties getCatalogProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
    Properties properties = new Properties();
    hmsTable.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
        // translate key names between HMS and Iceberg where needed
        String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
        properties.put(icebergKey, e.getValue());
    });
    if (properties.get(Catalogs.LOCATION) == null && hmsTable.getSd() != null && hmsTable.getSd().getLocation() != null) {
        properties.put(Catalogs.LOCATION, hmsTable.getSd().getLocation());
    }
    if (properties.get(Catalogs.NAME) == null) {
        properties.put(Catalogs.NAME, TableIdentifier.of(hmsTable.getDbName(), hmsTable.getTableName()).toString());
    }
    SerDeInfo serdeInfo = hmsTable.getSd().getSerdeInfo();
    if (serdeInfo != null) {
        serdeInfo.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
            String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
            properties.put(icebergKey, e.getValue());
        });
    }
    // Remove HMS table parameters we don't want to propagate to Iceberg
    PROPERTIES_TO_REMOVE.forEach(properties::remove);
    return properties;
}
Also used : PartitionSpecProxy(org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy) MetaException(org.apache.hadoop.hive.metastore.api.MetaException) CatalogUtil(org.apache.iceberg.CatalogUtil) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) LoggerFactory(org.slf4j.LoggerFactory) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) TableMetadata(org.apache.iceberg.TableMetadata) DeleteFiles(org.apache.iceberg.DeleteFiles) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) AlterTableType(org.apache.hadoop.hive.ql.ddl.table.AlterTableType) NameMapping(org.apache.iceberg.mapping.NameMapping) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) Path(org.apache.hadoop.fs.Path) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) Splitter(org.apache.iceberg.relocated.com.google.common.base.Splitter) EnumSet(java.util.EnumSet) TableMetadataParser(org.apache.iceberg.TableMetadataParser) MetaStoreUtils(org.apache.hadoop.hive.metastore.utils.MetaStoreUtils) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) HiveMetaHook(org.apache.hadoop.hive.metastore.HiveMetaHook) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Objects(java.util.Objects) Type(org.apache.iceberg.types.Type) List(java.util.List) UpdateProperties(org.apache.iceberg.UpdateProperties) PartitionSpec(org.apache.iceberg.PartitionSpec) Optional(java.util.Optional) TableProperties(org.apache.iceberg.TableProperties) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) Expressions(org.apache.iceberg.expressions.Expressions) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) Pair(org.apache.iceberg.util.Pair) MappingUtil(org.apache.iceberg.mapping.MappingUtil) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) TableName(org.apache.hadoop.hive.common.TableName) PartitionTransformSpec(org.apache.hadoop.hive.ql.parse.PartitionTransformSpec) Properties(java.util.Properties) Logger(org.slf4j.Logger) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) NameMappingParser(org.apache.iceberg.mapping.NameMappingParser) IOException(java.io.IOException) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Transaction(org.apache.iceberg.Transaction) Preconditions(org.apache.iceberg.relocated.com.google.common.base.Preconditions) FileIO(org.apache.iceberg.io.FileIO) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) PartitionTransform(org.apache.hadoop.hive.ql.parse.PartitionTransform) HiveTableOperations(org.apache.iceberg.hive.HiveTableOperations) SerDeInfo(org.apache.hadoop.hive.metastore.api.SerDeInfo) UpdateProperties(org.apache.iceberg.UpdateProperties) TableProperties(org.apache.iceberg.TableProperties) Properties(java.util.Properties)

Example 2 with Catalogs

use of org.apache.iceberg.mr.Catalogs in project hive by apache.

the class IcebergTableUtil method getTable.

/**
 * Load the iceberg table either from the {@link QueryState} or through the configured catalog. Look for the table
 * object stored in the query state. If it's null, it means the table was not loaded yet within the same query
 * therefore we claim it through the Catalogs API and then store it in query state.
 * @param configuration a Hadoop configuration
 * @param properties controlling properties
 * @return an Iceberg table
 */
static Table getTable(Configuration configuration, Properties properties) {
    String metaTable = properties.getProperty("metaTable");
    String tableName = properties.getProperty(Catalogs.NAME);
    if (metaTable != null) {
        properties.setProperty(Catalogs.NAME, tableName + "." + metaTable);
    }
    String tableIdentifier = properties.getProperty(Catalogs.NAME);
    return SessionStateUtil.getResource(configuration, tableIdentifier).filter(o -> o instanceof Table).map(o -> (Table) o).orElseGet(() -> {
        LOG.debug("Iceberg table {} is not found in QueryState. Loading table from configured catalog", tableIdentifier);
        Table tab = Catalogs.loadTable(configuration, properties);
        SessionStateUtil.addResource(configuration, tableIdentifier, tab);
        return tab;
    });
}
Also used : PartitionTransformSpec(org.apache.hadoop.hive.ql.parse.PartitionTransformSpec) Properties(java.util.Properties) Logger(org.slf4j.Logger) Table(org.apache.iceberg.Table) Catalogs(org.apache.iceberg.mr.Catalogs) LoggerFactory(org.slf4j.LoggerFactory) Schema(org.apache.iceberg.Schema) List(java.util.List) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) Configuration(org.apache.hadoop.conf.Configuration) PartitionSpec(org.apache.iceberg.PartitionSpec) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) QueryState(org.apache.hadoop.hive.ql.QueryState) Expressions(org.apache.iceberg.expressions.Expressions) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) Table(org.apache.iceberg.Table)

Aggregations

List (java.util.List)2 Properties (java.util.Properties)2 Configuration (org.apache.hadoop.conf.Configuration)2 org.apache.hadoop.hive.metastore.api.hive_metastoreConstants (org.apache.hadoop.hive.metastore.api.hive_metastoreConstants)2 PartitionTransformSpec (org.apache.hadoop.hive.ql.parse.PartitionTransformSpec)2 SessionStateUtil (org.apache.hadoop.hive.ql.session.SessionStateUtil)2 PartitionSpec (org.apache.iceberg.PartitionSpec)2 Schema (org.apache.iceberg.Schema)2 Table (org.apache.iceberg.Table)2 UpdatePartitionSpec (org.apache.iceberg.UpdatePartitionSpec)2 Expressions (org.apache.iceberg.expressions.Expressions)2 Catalogs (org.apache.iceberg.mr.Catalogs)2 Logger (org.slf4j.Logger)2 LoggerFactory (org.slf4j.LoggerFactory)2 IOException (java.io.IOException)1 Collection (java.util.Collection)1 Collections (java.util.Collections)1 EnumSet (java.util.EnumSet)1 Map (java.util.Map)1 Objects (java.util.Objects)1