use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestIcebergInputFormats method testCustomCatalog.
@Test
public void testCustomCatalog() throws IOException {
String warehouseLocation = temp.newFolder("hadoop_catalog").getAbsolutePath();
conf.set("warehouse.location", warehouseLocation);
conf.set(InputFormatConfig.CATALOG_NAME, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME);
conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogUtil.ICEBERG_CATALOG_TYPE), CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP);
conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogProperties.WAREHOUSE_LOCATION), warehouseLocation);
Catalog catalog = new HadoopCatalog(conf, conf.get("warehouse.location"));
TableIdentifier identifier = TableIdentifier.of("db", "t");
Table table = catalog.createTable(identifier, SCHEMA, SPEC, helper.properties());
helper.setTable(table);
List<Record> expectedRecords = helper.generateRandomRecords(1, 0L);
expectedRecords.get(0).set(2, "2020-03-20");
helper.appendToTable(Row.of("2020-03-20", 0), expectedRecords);
builder.readFrom(identifier);
testInputFormat.create(builder.conf()).validate(expectedRecords);
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergStorageHandlerLocalScan method testCreateTableWithColumnSpecificationPartitioned.
@Test
public void testCreateTableWithColumnSpecificationPartitioned() throws IOException {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
Map<StructLike, List<Record>> data = ImmutableMap.of(Row.of("Brown"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(0)), Row.of("Green"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(1)), Row.of("Pink"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name') " + "PARTITIONED BY (last_name STRING COMMENT 'This is last name') STORED BY " + "ICEBERG " + testTables.locationForCreateTableSQL(identifier) + testTables.propertiesForCreateTableSQL(ImmutableMap.of());
runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, data);
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergInserts method testInsertOverwriteBucketPartitionedTableThrowsError.
@Test
public void testInsertOverwriteBucketPartitionedTableThrowsError() {
TableIdentifier target = TableIdentifier.of("default", "target");
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).bucket("last_name", 16).identity("customer_id").build();
testTables.createTable(shell, target.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, ImmutableList.of());
AssertHelpers.assertThrows("IOW should not work on bucket partitioned table", IllegalArgumentException.class, "Cannot perform insert overwrite query on bucket partitioned Iceberg table", () -> shell.executeStatement(testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, target, true)));
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergInserts method testWriteWithDefaultWriteFormat.
@Test
public void testWriteWithDefaultWriteFormat() {
Assume.assumeTrue("Testing the default file format is enough for a single scenario.", testTableType == TestTables.TestTableType.HIVE_CATALOG && fileFormat == FileFormat.ORC);
TableIdentifier identifier = TableIdentifier.of("default", "customers");
// create Iceberg table without specifying a write format in the tbl properties
// it should fall back to using the default file format
shell.executeStatement(String.format("CREATE EXTERNAL TABLE %s (id bigint, name string) STORED BY '%s' %s %s", identifier, HiveIcebergStorageHandler.class.getName(), testTables.locationForCreateTableSQL(identifier), testTables.propertiesForCreateTableSQL(ImmutableMap.of())));
shell.executeStatement(String.format("INSERT INTO %s VALUES (10, 'Linda')", identifier));
List<Object[]> results = shell.executeStatement(String.format("SELECT * FROM %s", identifier));
Assert.assertEquals(1, results.size());
Assert.assertEquals(10L, results.get(0)[0]);
Assert.assertEquals("Linda", results.get(0)[1]);
}
use of org.apache.iceberg.catalog.TableIdentifier in project hive by apache.
the class TestHiveIcebergSchemaEvolution method testSchemaEvolutionForMigratedTables.
@Test
public void testSchemaEvolutionForMigratedTables() {
// create a standard Hive table w/ some records
TableIdentifier tableIdentifier = TableIdentifier.of("default", "customers");
shell.executeStatement(String.format("CREATE EXTERNAL TABLE customers (id bigint, first_name string, last_name string) STORED AS %s %s", fileFormat, testTables.locationForCreateTableSQL(tableIdentifier)));
shell.executeStatement("INSERT INTO customers VALUES (11, 'Lisa', 'Truman')");
// migrate it to Iceberg
shell.executeStatement("ALTER TABLE customers SET TBLPROPERTIES " + "('storage_handler'='org.apache.iceberg.mr.hive.HiveIcebergStorageHandler')");
// try to perform illegal schema evolution operations
AssertHelpers.assertThrows("issuing a replace columns operation on a migrated Iceberg table should throw", IllegalArgumentException.class, "Cannot perform REPLACE COLUMNS operation on a migrated Iceberg table", () -> shell.executeStatement("ALTER TABLE customers REPLACE COLUMNS (id bigint, last_name string)"));
AssertHelpers.assertThrows("issuing a change column operation on a migrated Iceberg table should throw", IllegalArgumentException.class, "Cannot perform CHANGE COLUMN operation on a migrated Iceberg table", () -> shell.executeStatement("ALTER TABLE customers CHANGE COLUMN id customer_id bigint"));
// check if valid ops are still okay
shell.executeStatement("ALTER TABLE customers UPDATE COLUMNS");
shell.executeStatement("ALTER TABLE customers ADD COLUMNS (date_joined timestamp)");
// double check if schema change worked safely
shell.executeStatement("INSERT INTO customers VALUES (22, 'Mike', 'Bloomfield', from_unixtime(unix_timestamp()))");
List<Object[]> result = shell.executeStatement("SELECT * FROM customers ORDER BY id");
Assert.assertEquals(2, result.size());
// first record has null timestamp
Assert.assertNull(result.get(0)[3]);
// second record has timestamp filled out
Assert.assertNotNull(result.get(1)[3]);
}
Aggregations