Search in sources :

Example 1 with UpdateProperties

use of org.apache.iceberg.UpdateProperties in project drill by apache.

the class IcebergMetastore method updateTableProperties.

/**
 * Checks config table properties against current table properties.
 * Adds properties that are absent, updates existing and removes absent.
 * If properties are the same, does nothing.
 *
 * @param table Iceberg table instance
 * @param tableProperties table properties from the config
 */
private void updateTableProperties(Table table, Map<String, String> tableProperties) {
    Map<String, String> currentProperties = table.properties();
    MapDifference<String, String> difference = Maps.difference(tableProperties, currentProperties);
    if (difference.areEqual()) {
        return;
    }
    UpdateProperties updateProperties = table.updateProperties();
    // collect properties that are different
    Map<String, String> propertiesToUpdate = difference.entriesDiffering().entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().leftValue(), (o, n) -> n));
    // add new properties
    propertiesToUpdate.putAll(difference.entriesOnlyOnLeft());
    logger.debug("Updating Iceberg table [{}] properties: {}", table.location(), updateProperties);
    propertiesToUpdate.forEach(updateProperties::set);
    logger.debug("Removing Iceberg table [{}] properties: {}", table.location(), difference.entriesOnlyOnRight());
    difference.entriesOnlyOnRight().keySet().forEach(updateProperties::remove);
    updateProperties.commit();
}
Also used : AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) IcebergTables(org.apache.drill.metastore.iceberg.components.tables.IcebergTables) FileSystem(org.apache.hadoop.fs.FileSystem) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) IcebergConfigConstants(org.apache.drill.metastore.iceberg.config.IcebergConfigConstants) Maps(org.apache.drill.shaded.guava.com.google.common.collect.Maps) IcebergMetastoreException(org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException) Configuration(org.apache.hadoop.conf.Configuration) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) Map(java.util.Map) Path(org.apache.hadoop.fs.Path) IcebergTableSchema(org.apache.drill.metastore.iceberg.schema.IcebergTableSchema) Metastore(org.apache.drill.metastore.Metastore) MapDifference(org.apache.drill.shaded.guava.com.google.common.collect.MapDifference) Logger(org.slf4j.Logger) Views(org.apache.drill.metastore.components.views.Views) Config(com.typesafe.config.Config) Table(org.apache.iceberg.Table) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) UpdateProperties(org.apache.iceberg.UpdateProperties) DrillConfig(org.apache.drill.common.config.DrillConfig) Tables(org.apache.drill.metastore.components.tables.Tables) Collections(java.util.Collections) UpdateProperties(org.apache.iceberg.UpdateProperties) HashMap(java.util.HashMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with UpdateProperties

use of org.apache.iceberg.UpdateProperties in project incubator-gobblin by apache.

the class IcebergMetadataWriter method flush.

/**
 * For flush of each table, we do the following logic:
 * 1. Commit the appendFiles if it exist
 * 2. Update the new table property: high watermark of GMCE, data offset range, schema versions
 * 3. Update the schema
 * 4. Commit the transaction
 * 5. reset tableMetadata
 * @param dbName
 * @param tableName
 */
@Override
public void flush(String dbName, String tableName) throws IOException {
    Lock writeLock = readWriteLock.writeLock();
    writeLock.lock();
    try {
        TableIdentifier tid = TableIdentifier.of(dbName, tableName);
        TableMetadata tableMetadata = tableMetadataMap.getOrDefault(tid, new TableMetadata());
        if (tableMetadata.transaction.isPresent()) {
            Transaction transaction = tableMetadata.transaction.get();
            Map<String, String> props = tableMetadata.newProperties.or(Maps.newHashMap(tableMetadata.lastProperties.or(getIcebergTable(tid).properties())));
            if (tableMetadata.appendFiles.isPresent()) {
                tableMetadata.appendFiles.get().commit();
                if (tableMetadata.completenessEnabled) {
                    String topicName = props.get(TOPIC_NAME_KEY);
                    if (topicName == null) {
                        log.error(String.format("Not performing audit check. %s is null. Please set as table property of %s.%s", TOPIC_NAME_KEY, dbName, tableName));
                    } else {
                        long newCompletenessWatermark = computeCompletenessWatermark(topicName, tableMetadata.datePartitions, tableMetadata.prevCompletenessWatermark);
                        if (newCompletenessWatermark > tableMetadata.prevCompletenessWatermark) {
                            log.info(String.format("Updating %s for %s.%s to %s", COMPLETION_WATERMARK_KEY, dbName, tableName, newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_KEY, String.valueOf(newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_TIMEZONE_KEY, this.timeZone);
                            tableMetadata.newCompletenessWatermark = newCompletenessWatermark;
                        }
                    }
                }
            }
            if (tableMetadata.deleteFiles.isPresent()) {
                tableMetadata.deleteFiles.get().commit();
            }
            // Set high waterMark
            Long highWatermark = tableCurrentWatermarkMap.get(tid);
            props.put(String.format(GMCE_HIGH_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), highWatermark.toString());
            // Set low waterMark
            props.put(String.format(GMCE_LOW_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), tableMetadata.lowWatermark.get().toString());
            // Set whether to delete metadata files after commit
            props.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, Boolean.toString(conf.getBoolean(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)));
            props.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, Integer.toString(conf.getInt(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)));
            // Set data offset range
            boolean containOffsetRange = setDatasetOffsetRange(tableMetadata, props);
            String topicName = tableName;
            if (containOffsetRange) {
                String topicPartitionString = tableMetadata.dataOffsetRange.get().keySet().iterator().next();
                // In case the topic name is not the table name or the topic name contains '-'
                topicName = topicPartitionString.substring(0, topicPartitionString.lastIndexOf('-'));
            }
            // Update schema(commit)
            updateSchema(tableMetadata, props, topicName);
            // Update properties
            UpdateProperties updateProperties = transaction.updateProperties();
            props.forEach(updateProperties::set);
            updateProperties.commit();
            try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
                transaction.commitTransaction();
            }
            // Emit GTE for snapshot commits
            Snapshot snapshot = tableMetadata.table.get().currentSnapshot();
            Map<String, String> currentProps = tableMetadata.table.get().properties();
            submitSnapshotCommitEvent(snapshot, tableMetadata, dbName, tableName, currentProps, highWatermark);
            // Reset the table metadata for next accumulation period
            tableMetadata.reset(currentProps, highWatermark, tableMetadata.newCompletenessWatermark);
            log.info(String.format("Finish commit of new snapshot %s for table %s", snapshot.snapshotId(), tid.toString()));
        } else {
            log.info("There's no transaction initiated for the table {}", tid.toString());
        }
    } catch (RuntimeException e) {
        throw new RuntimeException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } catch (Exception e) {
        throw new IOException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } finally {
        writeLock.unlock();
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) IOException(java.io.IOException) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) SchemaRegistryException(org.apache.gobblin.metrics.kafka.SchemaRegistryException) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) IOException(java.io.IOException) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Lock(java.util.concurrent.locks.Lock) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock) HiveLock(org.apache.gobblin.hive.HiveLock) Snapshot(org.apache.iceberg.Snapshot) UpdateProperties(org.apache.iceberg.UpdateProperties) Transaction(org.apache.iceberg.Transaction) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 3 with UpdateProperties

use of org.apache.iceberg.UpdateProperties in project hive by apache.

the class HiveIcebergMetaHook method alterTableProperties.

private void alterTableProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable, Map<String, String> contextProperties) {
    Map<String, String> hmsTableParameters = hmsTable.getParameters();
    Splitter splitter = Splitter.on(PROPERTIES_SEPARATOR);
    UpdateProperties icebergUpdateProperties = icebergTable.updateProperties();
    if (contextProperties.containsKey(SET_PROPERTIES)) {
        splitter.splitToList(contextProperties.get(SET_PROPERTIES)).forEach(k -> icebergUpdateProperties.set(k, hmsTableParameters.get(k)));
    } else if (contextProperties.containsKey(UNSET_PROPERTIES)) {
        splitter.splitToList(contextProperties.get(UNSET_PROPERTIES)).forEach(icebergUpdateProperties::remove);
    }
    icebergUpdateProperties.commit();
}
Also used : UpdateProperties(org.apache.iceberg.UpdateProperties) Splitter(org.apache.iceberg.relocated.com.google.common.base.Splitter)

Aggregations

UpdateProperties (org.apache.iceberg.UpdateProperties)3 IOException (java.io.IOException)2 AlreadyExistsException (org.apache.iceberg.exceptions.AlreadyExistsException)2 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)2 Config (com.typesafe.config.Config)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 Lock (java.util.concurrent.locks.Lock)1 ReadWriteLock (java.util.concurrent.locks.ReadWriteLock)1 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)1 Collectors (java.util.stream.Collectors)1 DrillConfig (org.apache.drill.common.config.DrillConfig)1 Metastore (org.apache.drill.metastore.Metastore)1 Tables (org.apache.drill.metastore.components.tables.Tables)1 Views (org.apache.drill.metastore.components.views.Views)1 IcebergTables (org.apache.drill.metastore.iceberg.components.tables.IcebergTables)1 IcebergConfigConstants (org.apache.drill.metastore.iceberg.config.IcebergConfigConstants)1 IcebergMetastoreException (org.apache.drill.metastore.iceberg.exceptions.IcebergMetastoreException)1