Search in sources :

Example 56 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testMoveLastNameBeforeCustomerIdInIcebergTable.

@Test
public void testMoveLastNameBeforeCustomerIdInIcebergTable() throws IOException {
    // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data.
    Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    // Move the last_name column before the customer_id in the table schema.
    icebergTable.updateSchema().moveBefore("last_name", "customer_id").commit();
    Schema customerSchemaLastNameFirst = new Schema(optional(1, "last_name", Types.StringType.get(), "This is last name"), optional(2, "customer_id", Types.LongType.get()), optional(3, "first_name", Types.StringType.get(), "This is first name"));
    TestHelper.RecordsBuilder customersWithLastNameFirstBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaLastNameFirst).add("Brown", 0L, "Alice").add("Green", 1L, "Bob").add("Pink", 2L, "Trudy");
    List<Record> customersWithLastNameFirst = customersWithLastNameFirstBuilder.build();
    // Run a 'select *' to check if the order of the column in the result has been changed.
    List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithLastNameFirst, HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1);
    // Query the data with names and check if the result is the same as when the table was created.
    rows = shell.executeStatement("SELECT customer_id, first_name, last_name FROM default.customers");
    HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0);
    // Insert data from Hive to check if the last_name column has to be before the customer_id in the values list.
    shell.executeStatement("INSERT INTO default.customers values ('Magenta', 3L, 'Lily')");
    customersWithLastNameFirstBuilder.add("Magenta", 3L, "Lily");
    customersWithLastNameFirst = customersWithLastNameFirstBuilder.build();
    rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithLastNameFirst, HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1);
}
Also used : TestHelper(org.apache.iceberg.mr.TestHelper) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 57 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testRemoveColumnFromIcebergTable.

@Test
public void testRemoveColumnFromIcebergTable() throws IOException {
    // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data.
    Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    // Remove the first_name column from the table.
    icebergTable.updateSchema().deleteColumn("first_name").commit();
    Schema customerSchemaWithoutFirstName = new Schema(optional(1, "customer_id", Types.LongType.get()), optional(2, "last_name", Types.StringType.get(), "This is last name"));
    TestHelper.RecordsBuilder customersWithoutFirstNameBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaWithoutFirstName).add(0L, "Brown").add(1L, "Green").add(2L, "Pink");
    List<Record> customersWithoutFirstName = customersWithoutFirstNameBuilder.build();
    // Run a 'select *' from Hive to see if the result doesn't contain the first_name column any more.
    List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithoutFirstName, HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0);
    // Run a 'select first_name' and check if an exception is thrown.
    AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, "Invalid table alias or column reference 'first_name'", () -> {
        shell.executeStatement("SELECT first_name FROM default.customers");
    });
    // Insert an entry from Hive to check if it can be inserted without the first_name column.
    shell.executeStatement("INSERT INTO default.customers values (4L, 'Magenta')");
    rows = shell.executeStatement("SELECT * FROM default.customers");
    customersWithoutFirstNameBuilder.add(4L, "Magenta");
    customersWithoutFirstName = customersWithoutFirstNameBuilder.build();
    HiveIcebergTestUtils.validateData(customersWithoutFirstName, HiveIcebergTestUtils.valueForRow(customerSchemaWithoutFirstName, rows), 0);
}
Also used : TestHelper(org.apache.iceberg.mr.TestHelper) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 58 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testIcebergHMSPropertiesTranslation.

@Test
public void testIcebergHMSPropertiesTranslation() throws Exception {
    Assume.assumeTrue("Iceberg - HMS property translation is only relevant for HiveCatalog", testTableType == TestTables.TestTableType.HIVE_CATALOG);
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    // Create HMS table with with a property to be translated
    shell.executeStatement(String.format("CREATE EXTERNAL TABLE default.customers " + "STORED BY ICEBERG " + "TBLPROPERTIES ('%s'='%s', '%s'='%s', '%s'='%s')", InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA), InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC), InputFormatConfig.EXTERNAL_TABLE_PURGE, "false"));
    // Check that HMS table prop was translated to equivalent Iceberg prop (purge -> gc.enabled)
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    Assert.assertEquals("false", icebergTable.properties().get(GC_ENABLED));
    Assert.assertNull(icebergTable.properties().get(InputFormatConfig.EXTERNAL_TABLE_PURGE));
    // Change Iceberg prop
    icebergTable.updateProperties().set(GC_ENABLED, "true").commit();
    // Check that Iceberg prop was translated to equivalent HMS prop (gc.enabled -> purge)
    Map<String, String> hmsParams = shell.metastore().getTable("default", "customers").getParameters();
    Assert.assertEquals("true", hmsParams.get(InputFormatConfig.EXTERNAL_TABLE_PURGE));
    Assert.assertNull(hmsParams.get(GC_ENABLED));
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 59 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCreateTableWithFormatV2ThroughTableProperty.

@Test
public void testCreateTableWithFormatV2ThroughTableProperty() {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    shell.executeStatement("CREATE EXTERNAL TABLE customers (id int, name string) STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "', " + "'" + TableProperties.FORMAT_VERSION + "'='2')");
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    Assert.assertEquals("should create table using format v2", 2, ((BaseTable) icebergTable).operations().current().formatVersion());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 60 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCreateTableWithUnpartitionedSpec.

@Test
public void testCreateTableWithUnpartitionedSpec() {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    // We need the location for HadoopTable based tests only
    shell.executeStatement("CREATE EXTERNAL TABLE customers " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "', " + "'" + InputFormatConfig.PARTITION_SPEC + "'='" + PartitionSpecParser.toJson(PartitionSpec.unpartitioned()) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    // Check the Iceberg table partition data
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    Assert.assertEquals(SPEC, icebergTable.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15