use of org.apache.iceberg.Table in project hive by apache.
the class HiveCreateReplaceTableTest method testReplaceTableTxnTableModifiedConcurrently.
@Test
public void testReplaceTableTxnTableModifiedConcurrently() {
Table table = catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC, tableLocation, Maps.newHashMap());
Assert.assertTrue("Table should exist", catalog.tableExists(TABLE_IDENTIFIER));
Transaction txn = catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, SPEC, false);
// update the table concurrently
table.updateProperties().set("another-prop", "another-value").commit();
txn.updateProperties().set("prop", "value").commit();
txn.commitTransaction();
// the replace should still succeed
table = catalog.loadTable(TABLE_IDENTIFIER);
Assert.assertNull("Table props should be updated", table.properties().get("another-prop"));
Assert.assertEquals("Table props should match", "value", table.properties().get("prop"));
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveIcebergMetaHook method getCatalogProperties.
/**
* Calculates the properties we would like to send to the catalog.
* <ul>
* <li>The base of the properties is the properties stored at the Hive Metastore for the given table
* <li>We add the {@link Catalogs#LOCATION} as the table location
* <li>We add the {@link Catalogs#NAME} as TableIdentifier defined by the database name and table name
* <li>We add the serdeProperties of the HMS table
* <li>We remove some parameters that we don't want to push down to the Iceberg table props
* </ul>
* @param hmsTable Table for which we are calculating the properties
* @return The properties we can provide for Iceberg functions, like {@link Catalogs}
*/
private static Properties getCatalogProperties(org.apache.hadoop.hive.metastore.api.Table hmsTable) {
Properties properties = new Properties();
hmsTable.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
// translate key names between HMS and Iceberg where needed
String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
if (properties.get(Catalogs.LOCATION) == null && hmsTable.getSd() != null && hmsTable.getSd().getLocation() != null) {
properties.put(Catalogs.LOCATION, hmsTable.getSd().getLocation());
}
if (properties.get(Catalogs.NAME) == null) {
properties.put(Catalogs.NAME, TableIdentifier.of(hmsTable.getDbName(), hmsTable.getTableName()).toString());
}
SerDeInfo serdeInfo = hmsTable.getSd().getSerdeInfo();
if (serdeInfo != null) {
serdeInfo.getParameters().entrySet().stream().filter(e -> e.getKey() != null && e.getValue() != null).forEach(e -> {
String icebergKey = HiveTableOperations.translateToIcebergProp(e.getKey());
properties.put(icebergKey, e.getValue());
});
}
// Remove HMS table parameters we don't want to propagate to Iceberg
PROPERTIES_TO_REMOVE.forEach(properties::remove);
return properties;
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveTableTest method testListTables.
@Test
public void testListTables() throws TException, IOException {
List<TableIdentifier> tableIdents = catalog.listTables(TABLE_IDENTIFIER.namespace());
List<TableIdentifier> expectedIdents = tableIdents.stream().filter(t -> t.namespace().level(0).equals(DB_NAME) && t.name().equals(TABLE_NAME)).collect(Collectors.toList());
Assert.assertEquals(1, expectedIdents.size());
Assert.assertTrue(catalog.tableExists(TABLE_IDENTIFIER));
// create a hive table
String hiveTableName = "test_hive_table";
org.apache.hadoop.hive.metastore.api.Table hiveTable = createHiveTable(hiveTableName);
metastoreClient.createTable(hiveTable);
List<TableIdentifier> tableIdents1 = catalog.listTables(TABLE_IDENTIFIER.namespace());
Assert.assertEquals("should only 1 iceberg table .", 1, tableIdents1.size());
Assert.assertTrue(catalog.tableExists(TABLE_IDENTIFIER));
metastoreClient.dropTable(DB_NAME, hiveTableName);
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveTableTest method testDropTable.
@Test
public void testDropTable() throws IOException {
Table table = catalog.loadTable(TABLE_IDENTIFIER);
GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
List<GenericData.Record> records = Lists.newArrayList(recordBuilder.set("id", 1L).build(), recordBuilder.set("id", 2L).build(), recordBuilder.set("id", 3L).build());
String location1 = table.location().replace("file:", "") + "/data/file1.avro";
try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(location1)).schema(schema).named("test").build()) {
for (GenericData.Record rec : records) {
writer.add(rec);
}
}
String location2 = table.location().replace("file:", "") + "/data/file2.avro";
try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(location2)).schema(schema).named("test").build()) {
for (GenericData.Record rec : records) {
writer.add(rec);
}
}
DataFile file1 = DataFiles.builder(table.spec()).withRecordCount(3).withPath(location1).withFileSizeInBytes(Files.localInput(location2).getLength()).build();
DataFile file2 = DataFiles.builder(table.spec()).withRecordCount(3).withPath(location2).withFileSizeInBytes(Files.localInput(location1).getLength()).build();
// add both data files
table.newAppend().appendFile(file1).appendFile(file2).commit();
// delete file2
table.newDelete().deleteFile(file2.path()).commit();
String manifestListLocation = table.currentSnapshot().manifestListLocation().replace("file:", "");
List<ManifestFile> manifests = table.currentSnapshot().allManifests();
Assert.assertTrue("Drop (table and data) should return true and drop the table", catalog.dropTable(TABLE_IDENTIFIER));
Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
Assert.assertFalse("Table data files should not exist", new File(location1).exists());
Assert.assertFalse("Table data files should not exist", new File(location2).exists());
Assert.assertFalse("Table manifest list files should not exist", new File(manifestListLocation).exists());
for (ManifestFile manifest : manifests) {
Assert.assertFalse("Table manifest files should not exist", new File(manifest.path().replace("file:", "")).exists());
}
Assert.assertFalse("Table metadata file should not exist", new File(((HasTableOperations) table).operations().current().metadataFileLocation().replace("file:", "")).exists());
}
use of org.apache.iceberg.Table in project hive by apache.
the class HiveTableTest method testDropWithoutPurgeLeavesTableData.
@Test
public void testDropWithoutPurgeLeavesTableData() throws IOException {
Table table = catalog.loadTable(TABLE_IDENTIFIER);
GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
List<GenericData.Record> records = Lists.newArrayList(recordBuilder.set("id", 1L).build(), recordBuilder.set("id", 2L).build(), recordBuilder.set("id", 3L).build());
String fileLocation = table.location().replace("file:", "") + "/data/file.avro";
try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) {
for (GenericData.Record rec : records) {
writer.add(rec);
}
}
DataFile file = DataFiles.builder(table.spec()).withRecordCount(3).withPath(fileLocation).withFileSizeInBytes(Files.localInput(fileLocation).getLength()).build();
table.newAppend().appendFile(file).commit();
String manifestListLocation = table.currentSnapshot().manifestListLocation().replace("file:", "");
Assert.assertTrue("Drop should return true and drop the table", catalog.dropTable(TABLE_IDENTIFIER, false));
Assert.assertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
Assert.assertTrue("Table data files should exist", new File(fileLocation).exists());
Assert.assertTrue("Table metadata files should exist", new File(manifestListLocation).exists());
}
Aggregations