Search in sources :

Example 1 with AutoCloseableHiveLock

use of org.apache.gobblin.hive.AutoCloseableHiveLock in project incubator-gobblin by apache.

the class IcebergMetadataWriter method flush.

/**
 * For flush of each table, we do the following logic:
 * 1. Commit the appendFiles if it exist
 * 2. Update the new table property: high watermark of GMCE, data offset range, schema versions
 * 3. Update the schema
 * 4. Commit the transaction
 * 5. reset tableMetadata
 * @param dbName
 * @param tableName
 */
@Override
public void flush(String dbName, String tableName) throws IOException {
    Lock writeLock = readWriteLock.writeLock();
    writeLock.lock();
    try {
        TableIdentifier tid = TableIdentifier.of(dbName, tableName);
        TableMetadata tableMetadata = tableMetadataMap.getOrDefault(tid, new TableMetadata());
        if (tableMetadata.transaction.isPresent()) {
            Transaction transaction = tableMetadata.transaction.get();
            Map<String, String> props = tableMetadata.newProperties.or(Maps.newHashMap(tableMetadata.lastProperties.or(getIcebergTable(tid).properties())));
            if (tableMetadata.appendFiles.isPresent()) {
                tableMetadata.appendFiles.get().commit();
                if (tableMetadata.completenessEnabled) {
                    String topicName = props.get(TOPIC_NAME_KEY);
                    if (topicName == null) {
                        log.error(String.format("Not performing audit check. %s is null. Please set as table property of %s.%s", TOPIC_NAME_KEY, dbName, tableName));
                    } else {
                        long newCompletenessWatermark = computeCompletenessWatermark(topicName, tableMetadata.datePartitions, tableMetadata.prevCompletenessWatermark);
                        if (newCompletenessWatermark > tableMetadata.prevCompletenessWatermark) {
                            log.info(String.format("Updating %s for %s.%s to %s", COMPLETION_WATERMARK_KEY, dbName, tableName, newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_KEY, String.valueOf(newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_TIMEZONE_KEY, this.timeZone);
                            tableMetadata.newCompletenessWatermark = newCompletenessWatermark;
                        }
                    }
                }
            }
            if (tableMetadata.deleteFiles.isPresent()) {
                tableMetadata.deleteFiles.get().commit();
            }
            // Set high waterMark
            Long highWatermark = tableCurrentWatermarkMap.get(tid);
            props.put(String.format(GMCE_HIGH_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), highWatermark.toString());
            // Set low waterMark
            props.put(String.format(GMCE_LOW_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), tableMetadata.lowWatermark.get().toString());
            // Set whether to delete metadata files after commit
            props.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, Boolean.toString(conf.getBoolean(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)));
            props.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, Integer.toString(conf.getInt(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)));
            // Set data offset range
            boolean containOffsetRange = setDatasetOffsetRange(tableMetadata, props);
            String topicName = tableName;
            if (containOffsetRange) {
                String topicPartitionString = tableMetadata.dataOffsetRange.get().keySet().iterator().next();
                // In case the topic name is not the table name or the topic name contains '-'
                topicName = topicPartitionString.substring(0, topicPartitionString.lastIndexOf('-'));
            }
            // Update schema(commit)
            updateSchema(tableMetadata, props, topicName);
            // Update properties
            UpdateProperties updateProperties = transaction.updateProperties();
            props.forEach(updateProperties::set);
            updateProperties.commit();
            try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
                transaction.commitTransaction();
            }
            // Emit GTE for snapshot commits
            Snapshot snapshot = tableMetadata.table.get().currentSnapshot();
            Map<String, String> currentProps = tableMetadata.table.get().properties();
            submitSnapshotCommitEvent(snapshot, tableMetadata, dbName, tableName, currentProps, highWatermark);
            // Reset the table metadata for next accumulation period
            tableMetadata.reset(currentProps, highWatermark, tableMetadata.newCompletenessWatermark);
            log.info(String.format("Finish commit of new snapshot %s for table %s", snapshot.snapshotId(), tid.toString()));
        } else {
            log.info("There's no transaction initiated for the table {}", tid.toString());
        }
    } catch (RuntimeException e) {
        throw new RuntimeException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } catch (Exception e) {
        throw new IOException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } finally {
        writeLock.unlock();
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) IOException(java.io.IOException) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) SchemaRegistryException(org.apache.gobblin.metrics.kafka.SchemaRegistryException) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) IOException(java.io.IOException) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Lock(java.util.concurrent.locks.Lock) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock) HiveLock(org.apache.gobblin.hive.HiveLock) Snapshot(org.apache.iceberg.Snapshot) UpdateProperties(org.apache.iceberg.UpdateProperties) Transaction(org.apache.iceberg.Transaction) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 2 with AutoCloseableHiveLock

use of org.apache.gobblin.hive.AutoCloseableHiveLock in project incubator-gobblin by apache.

the class HiveMetaStoreBasedRegister method ensureHiveTableExistenceBeforeAlternation.

/**
 * If table existed on Hive side will return false;
 * Or will create the table thru. RPC and return retVal from remote MetaStore.
 */
private boolean ensureHiveTableExistenceBeforeAlternation(String tableName, String dbName, IMetaStoreClient client, Table table) throws TException, IOException {
    try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
        try {
            if (!existsTable(dbName, tableName, client)) {
                try (Timer.Context context = this.metricContext.timer(CREATE_HIVE_TABLE).time()) {
                    client.createTable(getTableWithCreateTimeNow(table));
                    log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
                    return true;
                }
            }
        } catch (AlreadyExistsException ignore) {
        // Table already exists, continue
        } catch (TException e) {
            log.error(String.format("Unable to create Hive table %s in db %s: " + e.getMessage(), tableName, dbName), e);
            throw e;
        }
        log.info("Table {} already exists in db {}.", tableName, dbName);
        // When the logic up to here it means table already existed in db. Return false.
        return false;
    }
}
Also used : TException(org.apache.thrift.TException) Timer(com.codahale.metrics.Timer) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 3 with AutoCloseableHiveLock

use of org.apache.gobblin.hive.AutoCloseableHiveLock in project incubator-gobblin by apache.

the class HiveMetaStoreBasedRegister method createTableIfNotExists.

@Deprecated
private /**
 * @deprecated Please use {@link #createOrAlterTable(IMetaStoreClient, Table, HiveSpec)} instead.
 */
boolean createTableIfNotExists(IMetaStoreClient client, Table table, HiveTable hiveTable) throws IOException {
    String dbName = table.getDbName();
    String tableName = table.getTableName();
    try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
        boolean tableExists;
        try (Timer.Context context = this.metricContext.timer(TABLE_EXISTS).time()) {
            tableExists = client.tableExists(table.getDbName(), table.getTableName());
        }
        if (tableExists) {
            return false;
        }
        try (Timer.Context context = this.metricContext.timer(CREATE_HIVE_TABLE).time()) {
            client.createTable(getTableWithCreateTimeNow(table));
        }
        log.info(String.format("Created Hive table %s in db %s", tableName, dbName));
        HiveMetaStoreEventHelper.submitSuccessfulTableCreation(this.eventSubmitter, hiveTable);
        return true;
    } catch (TException e) {
        HiveMetaStoreEventHelper.submitFailedTableCreation(eventSubmitter, hiveTable, e);
        throw new IOException(String.format("Error in creating or altering Hive table %s in db %s", table.getTableName(), table.getDbName()), e);
    }
}
Also used : TException(org.apache.thrift.TException) Timer(com.codahale.metrics.Timer) IOException(java.io.IOException) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 4 with AutoCloseableHiveLock

use of org.apache.gobblin.hive.AutoCloseableHiveLock in project incubator-gobblin by apache.

the class HiveMetaStoreBasedRegister method addOrAlterPartitionWithPullMode.

private void addOrAlterPartitionWithPullMode(IMetaStoreClient client, Table table, HivePartition partition) throws TException, IOException {
    Partition nativePartition = HiveMetaStoreUtils.getPartition(partition);
    Preconditions.checkArgument(table.getPartitionKeysSize() == nativePartition.getValues().size(), String.format("Partition key size is %s but partition value size is %s", table.getPartitionKeys().size(), nativePartition.getValues().size()));
    try (AutoCloseableHiveLock lock = this.locks.getPartitionLock(table.getDbName(), table.getTableName(), nativePartition.getValues())) {
        Partition existedPartition;
        try {
            try (Timer.Context context = this.metricContext.timer(GET_HIVE_PARTITION).time()) {
                existedPartition = client.getPartition(table.getDbName(), table.getTableName(), nativePartition.getValues());
                if (this.skipDiffComputation) {
                    onPartitionExistWithoutComputingDiff(table, nativePartition, null);
                } else {
                    onPartitionExist(client, table, partition, nativePartition, existedPartition);
                }
            }
        } catch (NoSuchObjectException e) {
            try (Timer.Context context = this.metricContext.timer(ADD_PARTITION_TIMER).time()) {
                client.add_partition(getPartitionWithCreateTimeNow(nativePartition));
            } catch (Throwable e2) {
                log.error(String.format("Unable to add or alter partition %s in table %s with location %s: " + e2.getMessage(), stringifyPartitionVerbose(nativePartition), table.getTableName(), nativePartition.getSd().getLocation()), e2);
                throw e2;
            }
            log.info(String.format("Added partition %s to table %s with location %s", stringifyPartition(nativePartition), table.getTableName(), nativePartition.getSd().getLocation()));
        }
    }
}
Also used : MetricContext(org.apache.gobblin.metrics.MetricContext) Partition(org.apache.hadoop.hive.metastore.api.Partition) HivePartition(org.apache.gobblin.hive.HivePartition) Timer(com.codahale.metrics.Timer) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 5 with AutoCloseableHiveLock

use of org.apache.gobblin.hive.AutoCloseableHiveLock in project incubator-gobblin by apache.

the class HiveMetaStoreBasedRegister method addOrAlterPartitionWithPushMode.

private void addOrAlterPartitionWithPushMode(IMetaStoreClient client, Table table, HivePartition partition) throws TException, IOException {
    Partition nativePartition = HiveMetaStoreUtils.getPartition(partition);
    Preconditions.checkArgument(table.getPartitionKeysSize() == nativePartition.getValues().size(), String.format("Partition key size is %s but partition value size is %s", table.getPartitionKeys().size(), nativePartition.getValues().size()));
    try (AutoCloseableHiveLock lock = this.locks.getPartitionLock(table.getDbName(), table.getTableName(), nativePartition.getValues())) {
        try {
            try (Timer.Context context = this.metricContext.timer(ADD_PARTITION_TIMER).time()) {
                client.add_partition(getPartitionWithCreateTimeNow(nativePartition));
            }
            log.info(String.format("Added partition %s to table %s with location %s", stringifyPartition(nativePartition), table.getTableName(), nativePartition.getSd().getLocation()));
        } catch (AlreadyExistsException e) {
            try {
                if (this.skipDiffComputation) {
                    onPartitionExistWithoutComputingDiff(table, nativePartition, e);
                } else {
                    onPartitionExist(client, table, partition, nativePartition, null);
                }
            } catch (Throwable e2) {
                log.error(String.format("Unable to add or alter partition %s in table %s with location %s: " + e2.getMessage(), stringifyPartitionVerbose(nativePartition), table.getTableName(), nativePartition.getSd().getLocation()), e2);
                throw e2;
            }
        }
    }
}
Also used : Partition(org.apache.hadoop.hive.metastore.api.Partition) HivePartition(org.apache.gobblin.hive.HivePartition) Timer(com.codahale.metrics.Timer) AlreadyExistsException(org.apache.hadoop.hive.metastore.api.AlreadyExistsException) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Aggregations

AutoCloseableHiveLock (org.apache.gobblin.hive.AutoCloseableHiveLock)6 Timer (com.codahale.metrics.Timer)5 IOException (java.io.IOException)3 AlreadyExistsException (org.apache.hadoop.hive.metastore.api.AlreadyExistsException)3 TException (org.apache.thrift.TException)3 HivePartition (org.apache.gobblin.hive.HivePartition)2 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)2 Partition (org.apache.hadoop.hive.metastore.api.Partition)2 Lock (java.util.concurrent.locks.Lock)1 ReadWriteLock (java.util.concurrent.locks.ReadWriteLock)1 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)1 HiveLock (org.apache.gobblin.hive.HiveLock)1 MetricContext (org.apache.gobblin.metrics.MetricContext)1 SchemaRegistryException (org.apache.gobblin.metrics.kafka.SchemaRegistryException)1 Path (org.apache.hadoop.fs.Path)1 Database (org.apache.hadoop.hive.metastore.api.Database)1 Snapshot (org.apache.iceberg.Snapshot)1 Transaction (org.apache.iceberg.Transaction)1 UpdateProperties (org.apache.iceberg.UpdateProperties)1 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)1