Search in sources :

Example 6 with Transaction

use of org.apache.iceberg.Transaction in project hive by apache.

the class HiveCreateReplaceTableTest method testCreateOrReplaceTableTxnTableCreatedConcurrently.

@Test
public void testCreateOrReplaceTableTxnTableCreatedConcurrently() {
    Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
    Transaction txn = catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, PartitionSpec.unpartitioned(), tableLocation, Maps.newHashMap(), true);
    txn.updateProperties().set("prop", "value").commit();
    // create the table concurrently
    catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC);
    Assert.assertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER));
    // expect the transaction to succeed anyway
    txn.commitTransaction();
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    Assert.assertEquals("Partition spec should match", PartitionSpec.unpartitioned(), table.spec());
    Assert.assertEquals("Table props should match", "value", table.properties().get("prop"));
}
Also used : Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Test(org.junit.Test)

Example 7 with Transaction

use of org.apache.iceberg.Transaction in project incubator-gobblin by apache.

the class IcebergMetadataWriter method flush.

/**
 * For flush of each table, we do the following logic:
 * 1. Commit the appendFiles if it exist
 * 2. Update the new table property: high watermark of GMCE, data offset range, schema versions
 * 3. Update the schema
 * 4. Commit the transaction
 * 5. reset tableMetadata
 * @param dbName
 * @param tableName
 */
@Override
public void flush(String dbName, String tableName) throws IOException {
    Lock writeLock = readWriteLock.writeLock();
    writeLock.lock();
    try {
        TableIdentifier tid = TableIdentifier.of(dbName, tableName);
        TableMetadata tableMetadata = tableMetadataMap.getOrDefault(tid, new TableMetadata());
        if (tableMetadata.transaction.isPresent()) {
            Transaction transaction = tableMetadata.transaction.get();
            Map<String, String> props = tableMetadata.newProperties.or(Maps.newHashMap(tableMetadata.lastProperties.or(getIcebergTable(tid).properties())));
            if (tableMetadata.appendFiles.isPresent()) {
                tableMetadata.appendFiles.get().commit();
                if (tableMetadata.completenessEnabled) {
                    String topicName = props.get(TOPIC_NAME_KEY);
                    if (topicName == null) {
                        log.error(String.format("Not performing audit check. %s is null. Please set as table property of %s.%s", TOPIC_NAME_KEY, dbName, tableName));
                    } else {
                        long newCompletenessWatermark = computeCompletenessWatermark(topicName, tableMetadata.datePartitions, tableMetadata.prevCompletenessWatermark);
                        if (newCompletenessWatermark > tableMetadata.prevCompletenessWatermark) {
                            log.info(String.format("Updating %s for %s.%s to %s", COMPLETION_WATERMARK_KEY, dbName, tableName, newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_KEY, String.valueOf(newCompletenessWatermark));
                            props.put(COMPLETION_WATERMARK_TIMEZONE_KEY, this.timeZone);
                            tableMetadata.newCompletenessWatermark = newCompletenessWatermark;
                        }
                    }
                }
            }
            if (tableMetadata.deleteFiles.isPresent()) {
                tableMetadata.deleteFiles.get().commit();
            }
            // Set high waterMark
            Long highWatermark = tableCurrentWatermarkMap.get(tid);
            props.put(String.format(GMCE_HIGH_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), highWatermark.toString());
            // Set low waterMark
            props.put(String.format(GMCE_LOW_WATERMARK_KEY, tableTopicPartitionMap.get(tid)), tableMetadata.lowWatermark.get().toString());
            // Set whether to delete metadata files after commit
            props.put(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, Boolean.toString(conf.getBoolean(TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED, TableProperties.METADATA_DELETE_AFTER_COMMIT_ENABLED_DEFAULT)));
            props.put(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, Integer.toString(conf.getInt(TableProperties.METADATA_PREVIOUS_VERSIONS_MAX, TableProperties.METADATA_PREVIOUS_VERSIONS_MAX_DEFAULT)));
            // Set data offset range
            boolean containOffsetRange = setDatasetOffsetRange(tableMetadata, props);
            String topicName = tableName;
            if (containOffsetRange) {
                String topicPartitionString = tableMetadata.dataOffsetRange.get().keySet().iterator().next();
                // In case the topic name is not the table name or the topic name contains '-'
                topicName = topicPartitionString.substring(0, topicPartitionString.lastIndexOf('-'));
            }
            // Update schema(commit)
            updateSchema(tableMetadata, props, topicName);
            // Update properties
            UpdateProperties updateProperties = transaction.updateProperties();
            props.forEach(updateProperties::set);
            updateProperties.commit();
            try (AutoCloseableHiveLock lock = this.locks.getTableLock(dbName, tableName)) {
                transaction.commitTransaction();
            }
            // Emit GTE for snapshot commits
            Snapshot snapshot = tableMetadata.table.get().currentSnapshot();
            Map<String, String> currentProps = tableMetadata.table.get().properties();
            submitSnapshotCommitEvent(snapshot, tableMetadata, dbName, tableName, currentProps, highWatermark);
            // Reset the table metadata for next accumulation period
            tableMetadata.reset(currentProps, highWatermark, tableMetadata.newCompletenessWatermark);
            log.info(String.format("Finish commit of new snapshot %s for table %s", snapshot.snapshotId(), tid.toString()));
        } else {
            log.info("There's no transaction initiated for the table {}", tid.toString());
        }
    } catch (RuntimeException e) {
        throw new RuntimeException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } catch (Exception e) {
        throw new IOException(String.format("Fail to flush table %s %s", dbName, tableName), e);
    } finally {
        writeLock.unlock();
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) IOException(java.io.IOException) AlreadyExistsException(org.apache.iceberg.exceptions.AlreadyExistsException) SchemaRegistryException(org.apache.gobblin.metrics.kafka.SchemaRegistryException) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) IOException(java.io.IOException) ReadWriteLock(java.util.concurrent.locks.ReadWriteLock) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Lock(java.util.concurrent.locks.Lock) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock) HiveLock(org.apache.gobblin.hive.HiveLock) Snapshot(org.apache.iceberg.Snapshot) UpdateProperties(org.apache.iceberg.UpdateProperties) Transaction(org.apache.iceberg.Transaction) AutoCloseableHiveLock(org.apache.gobblin.hive.AutoCloseableHiveLock)

Example 8 with Transaction

use of org.apache.iceberg.Transaction in project hive by apache.

the class TestHiveCatalog method testReplaceTxnBuilder.

@Test
public void testReplaceTxnBuilder() throws Exception {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    String location = temp.newFolder("tbl").toString();
    try {
        Transaction createTxn = catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).withLocation(location).withProperty("key1", "value1").createOrReplaceTransaction();
        createTxn.commitTransaction();
        Table table = catalog.loadTable(tableIdent);
        Assert.assertEquals(1, table.spec().fields().size());
        String newLocation = temp.newFolder("tbl-2").toString();
        Transaction replaceTxn = catalog.buildTable(tableIdent, schema).withProperty("key2", "value2").withLocation(newLocation).replaceTransaction();
        replaceTxn.commitTransaction();
        table = catalog.loadTable(tableIdent);
        Assert.assertEquals(newLocation, table.location());
        Assert.assertNull(table.currentSnapshot());
        PartitionSpec v1Expected = PartitionSpec.builderFor(table.schema()).alwaysNull("data", "data_bucket").withSpecId(1).build();
        Assert.assertEquals("Table should have a spec with one void field", v1Expected, table.spec());
        Assert.assertEquals("value1", table.properties().get("key1"));
        Assert.assertEquals("value2", table.properties().get("key2"));
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 9 with Transaction

use of org.apache.iceberg.Transaction in project hive by apache.

the class HiveCreateReplaceTableTest method testCreateTableTxnWithGlobalTableLocation.

@Test
public void testCreateTableTxnWithGlobalTableLocation() {
    Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
    Transaction txn = catalog.newCreateTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, "file:///" + tableLocation, Maps.newHashMap());
    txn.commitTransaction();
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    DataFile dataFile = DataFiles.builder(SPEC).withPath("/path/to/data-a.parquet").withFileSizeInBytes(0).withRecordCount(1).build();
    table.newAppend().appendFile(dataFile).commit();
    Assert.assertEquals("Write should succeed", 1, Iterables.size(table.snapshots()));
}
Also used : DataFile(org.apache.iceberg.DataFile) Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) Test(org.junit.Test)

Example 10 with Transaction

use of org.apache.iceberg.Transaction in project hive by apache.

the class HiveCreateReplaceTableTest method testCreateTableTxnAndAppend.

@Test
public void testCreateTableTxnAndAppend() {
    Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
    Transaction txn = catalog.newCreateTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
    AppendFiles append = txn.newAppend();
    DataFile dataFile = DataFiles.builder(SPEC).withPath("/path/to/data-a.parquet").withFileSizeInBytes(0).withRecordCount(1).build();
    append.appendFile(dataFile);
    append.commit();
    txn.commitTransaction();
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    Snapshot snapshot = table.currentSnapshot();
    Assert.assertTrue("Table should have one manifest file", snapshot.allManifests().size() == 1);
}
Also used : DataFile(org.apache.iceberg.DataFile) Snapshot(org.apache.iceberg.Snapshot) Table(org.apache.iceberg.Table) Transaction(org.apache.iceberg.Transaction) AppendFiles(org.apache.iceberg.AppendFiles) Test(org.junit.Test)

Aggregations

Transaction (org.apache.iceberg.Transaction)16 Test (org.junit.Test)13 Table (org.apache.iceberg.Table)11 DataFile (org.apache.iceberg.DataFile)3 PartitionSpec (org.apache.iceberg.PartitionSpec)3 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)3 Schema (org.apache.iceberg.Schema)2 Snapshot (org.apache.iceberg.Snapshot)2 IOException (java.io.IOException)1 Lock (java.util.concurrent.locks.Lock)1 ReadWriteLock (java.util.concurrent.locks.ReadWriteLock)1 ReentrantReadWriteLock (java.util.concurrent.locks.ReentrantReadWriteLock)1 AutoCloseableHiveLock (org.apache.gobblin.hive.AutoCloseableHiveLock)1 HiveLock (org.apache.gobblin.hive.HiveLock)1 SchemaRegistryException (org.apache.gobblin.metrics.kafka.SchemaRegistryException)1 Path (org.apache.hadoop.fs.Path)1 AppendFiles (org.apache.iceberg.AppendFiles)1 UpdateProperties (org.apache.iceberg.UpdateProperties)1 GenericAppenderFactory (org.apache.iceberg.data.GenericAppenderFactory)1 GenericRecord (org.apache.iceberg.data.GenericRecord)1