Search in sources :

Example 36 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testPartitionTransform.

@Test
public void testPartitionTransform() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "year_field", Types.DateType.get()), optional(3, "month_field", Types.TimestampType.withZone()), optional(4, "day_field", Types.TimestampType.withoutZone()), optional(5, "hour_field", Types.TimestampType.withoutZone()), optional(6, "truncate_field", Types.StringType.get()), optional(7, "bucket_field", Types.StringType.get()), optional(8, "identity_field", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).year("year_field").month("month_field").day("day_field").hour("hour_field").truncate("truncate_field", 2).bucket("bucket_field", 2).identity("identity_field").build();
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (year(year_field), month(month_field), day(day_field), hour(hour_field), " + "truncate(2, truncate_field), bucket(2, bucket_field), identity_field)" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 37 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testAlterTableRenamePartitionColumn.

@Test
public void testAlterTableRenamePartitionColumn() throws Exception {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC, FileFormat.PARQUET, ImmutableList.of());
    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (last_name)");
    // Renaming (and reordering) a partition column
    shell.executeStatement("ALTER TABLE default.customers CHANGE last_name family_name string FIRST");
    List<PartitionField> partitionFields = testTables.loadTable(identifier).spec().fields();
    Assert.assertEquals(1, partitionFields.size());
    Assert.assertEquals("family_name", partitionFields.get(0).name());
    // Addign new columns, assigning them as partition columns then removing 1 partition column
    shell.executeStatement("ALTER TABLE default.customers ADD COLUMNS (p1 string, p2 string)");
    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (family_name, p1, p2)");
    shell.executeStatement("ALTER TABLE default.customers CHANGE p1 region string");
    shell.executeStatement("ALTER TABLE default.customers CHANGE p2 city string");
    shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (region, city)");
    List<Object[]> result = shell.executeStatement("DESCRIBE default.customers");
    Assert.assertArrayEquals(new String[] { "family_name", "VOID", null }, result.get(8));
    Assert.assertArrayEquals(new String[] { "region", "IDENTITY", null }, result.get(9));
    Assert.assertArrayEquals(new String[] { "city", "IDENTITY", null }, result.get(10));
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) PartitionField(org.apache.iceberg.PartitionField) Test(org.junit.Test)

Example 38 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testDropTableWithCorruptedMetadata.

@Test
public void testDropTableWithCorruptedMetadata() throws TException, IOException, InterruptedException {
    Assume.assumeTrue("Only HiveCatalog attempts to load the Iceberg table prior to dropping it.", testTableType == TestTables.TestTableType.HIVE_CATALOG);
    // create test table
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, FileFormat.PARQUET, ImmutableList.of());
    // enable data purging (this should set external.table.purge=true on the HMS table)
    Table table = testTables.loadTable(identifier);
    table.updateProperties().set(GC_ENABLED, "true").commit();
    // delete its current snapshot file (i.e. corrupt the metadata to make the Iceberg table unloadable)
    String metadataLocation = shell.metastore().getTable(identifier).getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
    table.io().deleteFile(metadataLocation);
    // check if HMS table is nonetheless still droppable
    shell.executeStatement(String.format("DROP TABLE %s", identifier));
    AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, "Table does not exist", () -> {
        testTables.loadTable(identifier);
    });
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 39 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergTruncateTable method testTruncateTable.

private void testTruncateTable(String databaseName, String tableName, Table icebergTable, List<Record> records, Schema schema, boolean externalTablePurge, boolean force) throws TException, InterruptedException {
    TableIdentifier identifier = TableIdentifier.of(databaseName, tableName);
    // Set the 'external.table.purge' table property on the table
    String alterTableCommand = "ALTER TABLE " + identifier + " SET TBLPROPERTIES('external.table.purge'='" + externalTablePurge + "')";
    shell.executeStatement(alterTableCommand);
    // Validate the initial data and the table statistics
    List<Object[]> rows = shell.executeStatement("SELECT * FROM " + identifier);
    HiveIcebergTestUtils.validateData(records, HiveIcebergTestUtils.valueForRow(schema, rows), 0);
    shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS");
    validateBasicStats(icebergTable, databaseName, tableName);
    // Run a 'truncate table' or 'truncate table force' command
    String truncateCommand = "TRUNCATE " + identifier;
    if (force) {
        truncateCommand = truncateCommand + " FORCE";
    }
    shell.executeStatement(truncateCommand);
    // Validate if the data is deleted from the table and also that the table
    // statistics are reset to 0.
    Table table = testTables.loadTable(identifier);
    Map<String, String> summary = table.currentSnapshot().summary();
    for (String key : STATS_MAPPING.values()) {
        Assert.assertEquals("0", summary.get(key));
    }
    rows = shell.executeStatement("SELECT * FROM " + identifier);
    Assert.assertEquals(0, rows.size());
    validateBasicStats(table, databaseName, tableName);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table)

Example 40 with TableIdentifier

use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.

the class TestHiveIcebergCTAS method testCTASFailureRollback.

@Test
public void testCTASFailureRollback() throws IOException {
    Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG);
    // force an execution error by passing in a committer class that Tez won't be able to load
    shell.setHiveSessionValue("hive.tez.mapreduce.output.committer.class", "org.apache.NotExistingClass");
    TableIdentifier target = TableIdentifier.of("default", "target");
    testTables.createTable(shell, "source", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    String[] partitioningSchemes = { "", "PARTITIONED BY (last_name)", "PARTITIONED BY (customer_id, last_name)" };
    for (String partitioning : partitioningSchemes) {
        AssertHelpers.assertThrows("Should fail while loading non-existent output committer class.", IllegalArgumentException.class, "org.apache.NotExistingClass", () -> shell.executeStatement(String.format("CREATE TABLE target %s STORED BY ICEBERG AS SELECT * FROM source", partitioning)));
        // CTAS table should have been dropped by the lifecycle hook
        Assert.assertThrows(NoSuchTableException.class, () -> testTables.loadTable(target));
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Test(org.junit.Test)

Aggregations

TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)87 Test (org.junit.Test)69 Table (org.apache.iceberg.Table)56 PartitionSpec (org.apache.iceberg.PartitionSpec)27 Schema (org.apache.iceberg.Schema)25 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)16 BaseTable (org.apache.iceberg.BaseTable)15 UpdateSchema (org.apache.iceberg.UpdateSchema)15 List (java.util.List)13 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)13 ArrayList (java.util.ArrayList)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)11 IOException (java.io.IOException)10 Map (java.util.Map)10 Types (org.apache.iceberg.types.Types)10 HashMap (java.util.HashMap)9 Path (org.apache.hadoop.fs.Path)9 TableProperties (org.apache.iceberg.TableProperties)9 Collections (java.util.Collections)8 Properties (java.util.Properties)8