use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testIcebergAndHmsTableProperties.
@Test
public void testIcebergAndHmsTableProperties() throws Exception {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
shell.executeStatement(String.format("CREATE EXTERNAL TABLE default.customers " + "STORED BY ICEBERG %s" + "TBLPROPERTIES ('%s'='%s', '%s'='%s', '%s'='%s', '%s'='%s')", // we need the location for HadoopTable based tests only
testTables.locationForCreateTableSQL(identifier), InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA), InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC), "custom_property", "initial_val", InputFormatConfig.CATALOG_NAME, testTables.catalogName()));
// Check the Iceberg table parameters
org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
Map<String, String> expectedIcebergProperties = new HashMap<>();
expectedIcebergProperties.put("custom_property", "initial_val");
expectedIcebergProperties.put("EXTERNAL", "TRUE");
expectedIcebergProperties.put("storage_handler", HiveIcebergStorageHandler.class.getName());
expectedIcebergProperties.put(serdeConstants.SERIALIZATION_FORMAT, "1");
// Check the HMS table parameters
org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
Map<String, String> hmsParams = hmsTable.getParameters().entrySet().stream().filter(e -> !IGNORED_PARAMS.contains(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
Properties tableProperties = new Properties();
tableProperties.putAll(hmsParams);
if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
expectedIcebergProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true");
}
if (MetastoreUtil.hive3PresentOnClasspath()) {
expectedIcebergProperties.put("bucketing_version", "2");
}
Assert.assertEquals(expectedIcebergProperties, icebergTable.properties());
if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
Assert.assertEquals(10, hmsParams.size());
Assert.assertEquals("initial_val", hmsParams.get("custom_property"));
Assert.assertEquals("TRUE", hmsParams.get(InputFormatConfig.EXTERNAL_TABLE_PURGE));
Assert.assertEquals("TRUE", hmsParams.get("EXTERNAL"));
Assert.assertEquals("true", hmsParams.get(TableProperties.ENGINE_HIVE_ENABLED));
Assert.assertEquals(HiveIcebergStorageHandler.class.getName(), hmsParams.get(hive_metastoreConstants.META_TABLE_STORAGE));
Assert.assertEquals(BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase(), hmsParams.get(BaseMetastoreTableOperations.TABLE_TYPE_PROP));
Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP), getCurrentSnapshotForHiveCatalogTable(icebergTable));
Assert.assertNull(hmsParams.get(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP));
Assert.assertNotNull(hmsParams.get(hive_metastoreConstants.DDL_TIME));
Assert.assertNotNull(hmsParams.get(serdeConstants.SERIALIZATION_FORMAT));
} else {
Assert.assertEquals(7, hmsParams.size());
Assert.assertNull(hmsParams.get(TableProperties.ENGINE_HIVE_ENABLED));
}
// Check HMS inputformat/outputformat/serde
Assert.assertEquals(HiveIcebergInputFormat.class.getName(), hmsTable.getSd().getInputFormat());
Assert.assertEquals(HiveIcebergOutputFormat.class.getName(), hmsTable.getSd().getOutputFormat());
Assert.assertEquals(HiveIcebergSerDe.class.getName(), hmsTable.getSd().getSerdeInfo().getSerializationLib());
// Add two new properties to the Iceberg table and update an existing one
icebergTable.updateProperties().set("new_prop_1", "true").set("new_prop_2", "false").set("custom_property", "new_val").commit();
// Refresh the HMS table to see if new Iceberg properties got synced into HMS
hmsParams = shell.metastore().getTable("default", "customers").getParameters().entrySet().stream().filter(e -> !IGNORED_PARAMS.contains(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
// 2 newly-added properties + previous_metadata_location prop
Assert.assertEquals(13, hmsParams.size());
Assert.assertEquals("true", hmsParams.get("new_prop_1"));
Assert.assertEquals("false", hmsParams.get("new_prop_2"));
Assert.assertEquals("new_val", hmsParams.get("custom_property"));
String prevSnapshot = getCurrentSnapshotForHiveCatalogTable(icebergTable);
icebergTable.refresh();
String newSnapshot = getCurrentSnapshotForHiveCatalogTable(icebergTable);
Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP), prevSnapshot);
Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP), newSnapshot);
} else {
Assert.assertEquals(7, hmsParams.size());
}
// Remove some Iceberg props and see if they're removed from HMS table props as well
if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
icebergTable.updateProperties().remove("custom_property").remove("new_prop_1").commit();
hmsParams = shell.metastore().getTable("default", "customers").getParameters();
Assert.assertFalse(hmsParams.containsKey("custom_property"));
Assert.assertFalse(hmsParams.containsKey("new_prop_1"));
Assert.assertTrue(hmsParams.containsKey("new_prop_2"));
}
// append some data and check whether HMS stats are aligned with snapshot summary
if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
List<Record> records = HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS;
testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, FileFormat.PARQUET, null, records);
hmsParams = shell.metastore().getTable("default", "customers").getParameters();
Map<String, String> summary = icebergTable.currentSnapshot().summary();
Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP), hmsParams.get(StatsSetupConst.NUM_FILES));
Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_RECORDS_PROP), hmsParams.get(StatsSetupConst.ROW_COUNT));
Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP), hmsParams.get(StatsSetupConst.TOTAL_SIZE));
}
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testCreateTableWithoutColumnComments.
@Test
public void testCreateTableWithoutColumnComments() {
TableIdentifier identifier = TableIdentifier.of("default", "without_comment_table");
shell.executeStatement("CREATE EXTERNAL TABLE without_comment_table (" + "t_int INT, " + "t_string STRING) " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()));
org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
List<Object[]> rows = shell.executeStatement("DESCRIBE default.without_comment_table");
Assert.assertEquals(icebergTable.schema().columns().size(), rows.size());
for (int i = 0; i < icebergTable.schema().columns().size(); i++) {
Types.NestedField field = icebergTable.schema().columns().get(i);
Assert.assertNull(field.doc());
Assert.assertArrayEquals(new Object[] { field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), "from deserializer" }, rows.get(i));
}
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransformSameField.
@Test
public void testSetPartitionTransformSameField() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "truncate_field", Types.StringType.get()), optional(3, "bucket_field", Types.StringType.get()));
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("truncate_field", 2).bucket("bucket_field", 2).build();
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
// Change one, keep one
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
spec = PartitionSpec.builderFor(schema).withSpecId(1).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).truncate("truncate_field", 3, "truncate_field_trunc_3").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
// Change one again, keep the other one
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
// Keep the already changed, change the other one (change the order of clauses in the spec)
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
spec = PartitionSpec.builderFor(schema).withSpecId(3).alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").bucket("bucket_field", 3, "bucket_field_bucket_3").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testAlterTableChangeColumnTypeAndComment.
@Test
public void testAlterTableChangeColumnTypeAndComment() throws TException, InterruptedException {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
Schema schema = new Schema(optional(1, "customer_id", Types.IntegerType.get()), optional(2, "last_name", Types.StringType.get(), "This is last name"));
testTables.createTable(shell, identifier.name(), schema, SPEC, FileFormat.PARQUET, ImmutableList.of());
shell.executeStatement("ALTER TABLE default.customers CHANGE COLUMN " + "customer_id customer_id bigint COMMENT 'This is an identifier'");
org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
List<FieldSchema> icebergSchema = HiveSchemaUtil.convert(icebergTable.schema());
List<FieldSchema> hmsSchema = hmsTable.getSd().getCols();
List<FieldSchema> expectedSchema = Lists.newArrayList(new FieldSchema("customer_id", "bigint", "This is an identifier"), new FieldSchema("last_name", "string", "This is last name"));
Assert.assertEquals(expectedSchema, icebergSchema);
if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
expectedSchema.stream().filter(fs -> fs.getComment() == null).forEach(fs -> fs.setComment("from deserializer"));
}
Assert.assertEquals(expectedSchema, hmsSchema);
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestTables method createTable.
/**
* Creates a partitioned Hive test table using Hive SQL. The table will be in the 'default' database.
* The table will be populated with the provided List of {@link Record}s using a Hive insert statement.
* @param shell The HiveShell used for Hive table creation
* @param tableName The name of the test table
* @param schema The schema used for the table creation
* @param spec The partition specification for the table
* @param fileFormat The file format used for writing the data
* @param records The records with which the table is populated
* @param formatVersion The version of the spec the table should use (format-version)
* @return The created table
* @throws IOException If there is an error writing data
*/
public Table createTable(TestHiveShell shell, String tableName, Schema schema, PartitionSpec spec, FileFormat fileFormat, List<Record> records, Integer formatVersion) {
TableIdentifier identifier = TableIdentifier.of("default", tableName);
String tblProps = propertiesForCreateTableSQL(ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, fileFormat.toString(), InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(schema), InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec), TableProperties.FORMAT_VERSION, Integer.toString(formatVersion)));
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " STORED BY ICEBERG " + locationForCreateTableSQL(identifier) + tblProps);
if (records != null && !records.isEmpty()) {
String query = getInsertQuery(records, identifier, false);
shell.executeStatement(query);
}
return loadTable(identifier);
}
Aggregations