use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testPartitionTransform.
@Test
public void testPartitionTransform() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "year_field", Types.DateType.get()), optional(3, "month_field", Types.TimestampType.withZone()), optional(4, "day_field", Types.TimestampType.withoutZone()), optional(5, "hour_field", Types.TimestampType.withoutZone()), optional(6, "truncate_field", Types.StringType.get()), optional(7, "bucket_field", Types.StringType.get()), optional(8, "identity_field", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).year("year_field").month("month_field").day("day_field").hour("hour_field").truncate("truncate_field", 2).bucket("bucket_field", 2).identity("identity_field").build();
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (year(year_field), month(month_field), day(day_field), hour(hour_field), " + "truncate(2, truncate_field), bucket(2, bucket_field), identity_field)" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testAlterTableRenamePartitionColumn.
@Test
public void testAlterTableRenamePartitionColumn() throws Exception {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, SPEC, FileFormat.PARQUET, ImmutableList.of());
shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (last_name)");
// Renaming (and reordering) a partition column
shell.executeStatement("ALTER TABLE default.customers CHANGE last_name family_name string FIRST");
List<PartitionField> partitionFields = testTables.loadTable(identifier).spec().fields();
Assert.assertEquals(1, partitionFields.size());
Assert.assertEquals("family_name", partitionFields.get(0).name());
// Addign new columns, assigning them as partition columns then removing 1 partition column
shell.executeStatement("ALTER TABLE default.customers ADD COLUMNS (p1 string, p2 string)");
shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (family_name, p1, p2)");
shell.executeStatement("ALTER TABLE default.customers CHANGE p1 region string");
shell.executeStatement("ALTER TABLE default.customers CHANGE p2 city string");
shell.executeStatement("ALTER TABLE default.customers SET PARTITION SPEC (region, city)");
List<Object[]> result = shell.executeStatement("DESCRIBE default.customers");
Assert.assertArrayEquals(new String[] { "family_name", "VOID", null }, result.get(8));
Assert.assertArrayEquals(new String[] { "region", "IDENTITY", null }, result.get(9));
Assert.assertArrayEquals(new String[] { "city", "IDENTITY", null }, result.get(10));
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testDropTableWithCorruptedMetadata.
@Test
public void testDropTableWithCorruptedMetadata() throws TException, IOException, InterruptedException {
Assume.assumeTrue("Only HiveCatalog attempts to load the Iceberg table prior to dropping it.", testTableType == TestTables.TestTableType.HIVE_CATALOG);
// create test table
TableIdentifier identifier = TableIdentifier.of("default", "customers");
testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, FileFormat.PARQUET, ImmutableList.of());
// enable data purging (this should set external.table.purge=true on the HMS table)
Table table = testTables.loadTable(identifier);
table.updateProperties().set(GC_ENABLED, "true").commit();
// delete its current snapshot file (i.e. corrupt the metadata to make the Iceberg table unloadable)
String metadataLocation = shell.metastore().getTable(identifier).getParameters().get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP);
table.io().deleteFile(metadataLocation);
// check if HMS table is nonetheless still droppable
shell.executeStatement(String.format("DROP TABLE %s", identifier));
AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, "Table does not exist", () -> {
testTables.loadTable(identifier);
});
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergTruncateTable method testTruncateTable.
private void testTruncateTable(String databaseName, String tableName, Table icebergTable, List<Record> records, Schema schema, boolean externalTablePurge, boolean force) throws TException, InterruptedException {
TableIdentifier identifier = TableIdentifier.of(databaseName, tableName);
// Set the 'external.table.purge' table property on the table
String alterTableCommand = "ALTER TABLE " + identifier + " SET TBLPROPERTIES('external.table.purge'='" + externalTablePurge + "')";
shell.executeStatement(alterTableCommand);
// Validate the initial data and the table statistics
List<Object[]> rows = shell.executeStatement("SELECT * FROM " + identifier);
HiveIcebergTestUtils.validateData(records, HiveIcebergTestUtils.valueForRow(schema, rows), 0);
shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS");
validateBasicStats(icebergTable, databaseName, tableName);
// Run a 'truncate table' or 'truncate table force' command
String truncateCommand = "TRUNCATE " + identifier;
if (force) {
truncateCommand = truncateCommand + " FORCE";
}
shell.executeStatement(truncateCommand);
// Validate if the data is deleted from the table and also that the table
// statistics are reset to 0.
Table table = testTables.loadTable(identifier);
Map<String, String> summary = table.currentSnapshot().summary();
for (String key : STATS_MAPPING.values()) {
Assert.assertEquals("0", summary.get(key));
}
rows = shell.executeStatement("SELECT * FROM " + identifier);
Assert.assertEquals(0, rows.size());
validateBasicStats(table, databaseName, tableName);
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergCTAS method testCTASFailureRollback.
@Test
public void testCTASFailureRollback() throws IOException {
Assume.assumeTrue(HiveIcebergSerDe.CTAS_EXCEPTION_MSG, testTableType == TestTables.TestTableType.HIVE_CATALOG);
// force an execution error by passing in a committer class that Tez won't be able to load
shell.setHiveSessionValue("hive.tez.mapreduce.output.committer.class", "org.apache.NotExistingClass");
TableIdentifier target = TableIdentifier.of("default", "target");
testTables.createTable(shell, "source", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
String[] partitioningSchemes = { "", "PARTITIONED BY (last_name)", "PARTITIONED BY (customer_id, last_name)" };
for (String partitioning : partitioningSchemes) {
AssertHelpers.assertThrows("Should fail while loading non-existent output committer class.", IllegalArgumentException.class, "org.apache.NotExistingClass", () -> shell.executeStatement(String.format("CREATE TABLE target %s STORED BY ICEBERG AS SELECT * FROM source", partitioning)));
// CTAS table should have been dropped by the lifecycle hook
Assert.assertThrows(NoSuchTableException.class, () -> testTables.loadTable(target));
}
}
Aggregations