Search in sources :

Example 96 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCommits method testThriftExceptionSuccessOnCommit.

/**
 * Pretends we throw an error while persisting that actually does commit serverside
 */
@Test
public void testThriftExceptionSuccessOnCommit() throws TException, InterruptedException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();
    TableMetadata metadataV1 = ops.current();
    table.updateSchema().addColumn("n", Types.IntegerType.get()).commit();
    ops.refresh();
    TableMetadata metadataV2 = ops.current();
    Assert.assertEquals(2, ops.current().schema().columns().size());
    HiveTableOperations spyOps = spy(ops);
    // Simulate a communication error after a successful commit
    commitAndThrowException(ops, spyOps);
    // Shouldn't throw because the commit actually succeeds even though persistTable throws an exception
    spyOps.commit(metadataV2, metadataV1);
    ops.refresh();
    Assert.assertNotEquals("Current metadata should have changed", metadataV2, ops.current());
    Assert.assertTrue("Current metadata file should still exist", metadataFileExists(ops.current()));
    Assert.assertEquals("Commit should have been successful and new metadata file should be made", 3, metadataFileCount(ops.current()));
}
Also used : TableMetadata(org.apache.iceberg.TableMetadata) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 97 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCommits method testThriftExceptionConcurrentCommit.

/**
 * Pretends we threw an exception while persisting, the commit succeeded, the lock expired,
 * and a second committer placed a commit on top of ours before the first committer was able to check
 * if their commit succeeded or not
 *
 * Timeline:
 *   Client 1 commits which throws an exception but suceeded
 *   Client 1's lock expires while waiting to do the recheck for commit success
 *   Client 2 acquires a lock, commits successfully on top of client 1's commit and release lock
 *   Client 1 check's to see if their commit was successful
 *
 * This tests to make sure a disconnected client 1 doesn't think their commit failed just because it isn't the
 * current one during the recheck phase.
 */
@Test
public void testThriftExceptionConcurrentCommit() throws TException, InterruptedException, UnknownHostException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();
    TableMetadata metadataV1 = ops.current();
    table.updateSchema().addColumn("n", Types.IntegerType.get()).commit();
    ops.refresh();
    TableMetadata metadataV2 = ops.current();
    Assert.assertEquals(2, ops.current().schema().columns().size());
    HiveTableOperations spyOps = spy(ops);
    AtomicLong lockId = new AtomicLong();
    doAnswer(i -> {
        lockId.set(ops.acquireLock());
        return lockId.get();
    }).when(spyOps).acquireLock();
    concurrentCommitAndThrowException(ops, spyOps, table, lockId);
    /*
    This commit and our concurrent commit should succeed even though this commit throws an exception
    after the persist operation succeeds
     */
    spyOps.commit(metadataV2, metadataV1);
    ops.refresh();
    Assert.assertNotEquals("Current metadata should have changed", metadataV2, ops.current());
    Assert.assertTrue("Current metadata file should still exist", metadataFileExists(ops.current()));
    Assert.assertEquals("The column addition from the concurrent commit should have been successful", 2, ops.current().schema().columns().size());
}
Also used : TableMetadata(org.apache.iceberg.TableMetadata) AtomicLong(java.util.concurrent.atomic.AtomicLong) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 98 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCommits method testSuppressUnlockExceptions.

@Test
public void testSuppressUnlockExceptions() throws TException, InterruptedException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();
    TableMetadata metadataV1 = ops.current();
    table.updateSchema().addColumn("n", Types.IntegerType.get()).commit();
    ops.refresh();
    TableMetadata metadataV2 = ops.current();
    Assert.assertEquals(2, ops.current().schema().columns().size());
    HiveTableOperations spyOps = spy(ops);
    ArgumentCaptor<Long> lockId = ArgumentCaptor.forClass(Long.class);
    doThrow(new RuntimeException()).when(spyOps).doUnlock(lockId.capture());
    try {
        spyOps.commit(metadataV2, metadataV1);
    } finally {
        ops.doUnlock(lockId.getValue());
    }
    ops.refresh();
    // the commit must succeed
    Assert.assertEquals(1, ops.current().schema().columns().size());
}
Also used : TableMetadata(org.apache.iceberg.TableMetadata) Table(org.apache.iceberg.Table) AtomicLong(java.util.concurrent.atomic.AtomicLong) Test(org.junit.Test)

Example 99 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCommits method testThriftExceptionFailureOnCommit.

/**
 * Pretends we throw an error while persisting that actually fails to commit serverside
 */
@Test
public void testThriftExceptionFailureOnCommit() throws TException, InterruptedException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();
    TableMetadata metadataV1 = ops.current();
    table.updateSchema().addColumn("n", Types.IntegerType.get()).commit();
    ops.refresh();
    TableMetadata metadataV2 = ops.current();
    Assert.assertEquals(2, ops.current().schema().columns().size());
    HiveTableOperations spyOps = spy(ops);
    failCommitAndThrowException(spyOps);
    AssertHelpers.assertThrows("We should rethrow generic runtime errors if the " + "commit actually doesn't succeed", CommitStateUnknownException.class, "Cannot determine whether the commit was successful or not, the underlying data files may " + "or may not be needed. Manual intervention via the Remove Orphan Files Action can remove these files " + "when a connection to the Catalog can be re-established if the commit was actually unsuccessful.", () -> spyOps.commit(metadataV2, metadataV1));
    ops.refresh();
    Assert.assertEquals("Current metadata should not have changed", metadataV2, ops.current());
    Assert.assertTrue("Current metadata should still exist", metadataFileExists(metadataV2));
    Assert.assertEquals("New non-current metadata file should be added", 3, metadataFileCount(ops.current()));
}
Also used : TableMetadata(org.apache.iceberg.TableMetadata) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 100 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveCommits method testThriftExceptionsUnknownSuccessCommit.

/**
 * Pretends we throw an exception while persisting and don't know what happened, can't check to find out,
 * but in reality the commit succeeded
 */
@Test
public void testThriftExceptionsUnknownSuccessCommit() throws TException, InterruptedException {
    Table table = catalog.loadTable(TABLE_IDENTIFIER);
    HiveTableOperations ops = (HiveTableOperations) ((HasTableOperations) table).operations();
    TableMetadata metadataV1 = ops.current();
    table.updateSchema().addColumn("n", Types.IntegerType.get()).commit();
    ops.refresh();
    TableMetadata metadataV2 = ops.current();
    Assert.assertEquals(2, ops.current().schema().columns().size());
    HiveTableOperations spyOps = spy(ops);
    commitAndThrowException(ops, spyOps);
    breakFallbackCatalogCommitCheck(spyOps);
    AssertHelpers.assertThrows("Should throw CommitStateUnknownException since the catalog check was blocked", CommitStateUnknownException.class, "Datacenter on fire", () -> spyOps.commit(metadataV2, metadataV1));
    ops.refresh();
    Assert.assertFalse("Current metadata should have changed", ops.current().equals(metadataV2));
    Assert.assertTrue("Current metadata file should still exist", metadataFileExists(ops.current()));
}
Also used : TableMetadata(org.apache.iceberg.TableMetadata) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15