use of org.apache.iceberg.catalog.Catalog in project hive by apache.
the class Catalogs method createTable.
/**
* Creates an Iceberg table using the catalog specified by the configuration.
* <p>
* The properties should contain the following values:
* <ul>
* <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
* <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
* <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
* not provided
* </ul><p>
* Other properties will be handled over to the Table creation. The controlling properties above will not be
* propagated.
* @param conf a Hadoop conf
* @param props the controlling properties
* @return the created Iceberg table
*/
public static Table createTable(Configuration conf, Properties props) {
String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
Preconditions.checkNotNull(schemaString, "Table schema not set");
Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA));
String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC);
PartitionSpec spec = PartitionSpec.unpartitioned();
if (specString != null) {
spec = PartitionSpecParser.fromJson(schema, specString);
}
String location = props.getProperty(LOCATION);
String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
// Create a table property map without the controlling properties
Map<String, String> map = Maps.newHashMapWithExpectedSize(props.size());
for (Object key : props.keySet()) {
if (!PROPERTIES_TO_REMOVE.contains(key)) {
map.put(key.toString(), props.get(key).toString());
}
}
Optional<Catalog> catalog = loadCatalog(conf, catalogName);
if (catalog.isPresent()) {
String name = props.getProperty(NAME);
Preconditions.checkNotNull(name, "Table identifier not set");
return catalog.get().createTable(TableIdentifier.parse(name), schema, spec, location, map);
}
Preconditions.checkNotNull(location, "Table location not set");
return new HadoopTables(conf).create(schema, spec, map, location);
}
use of org.apache.iceberg.catalog.Catalog in project hive by apache.
the class TestIcebergInputFormats method testCustomCatalog.
@Test
public void testCustomCatalog() throws IOException {
String warehouseLocation = temp.newFolder("hadoop_catalog").getAbsolutePath();
conf.set("warehouse.location", warehouseLocation);
conf.set(InputFormatConfig.CATALOG_NAME, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME);
conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogUtil.ICEBERG_CATALOG_TYPE), CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP);
conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogProperties.WAREHOUSE_LOCATION), warehouseLocation);
Catalog catalog = new HadoopCatalog(conf, conf.get("warehouse.location"));
TableIdentifier identifier = TableIdentifier.of("db", "t");
Table table = catalog.createTable(identifier, SCHEMA, SPEC, helper.properties());
helper.setTable(table);
List<Record> expectedRecords = helper.generateRandomRecords(1, 0L);
expectedRecords.get(0).set(2, "2020-03-20");
helper.appendToTable(Row.of("2020-03-20", 0), expectedRecords);
builder.readFrom(identifier);
testInputFormat.create(builder.conf()).validate(expectedRecords);
}
use of org.apache.iceberg.catalog.Catalog in project hive by apache.
the class TestCatalogs method testLoadCatalogHadoop.
@Test
public void testLoadCatalogHadoop() {
String catalogName = "barCatalog";
conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogUtil.ICEBERG_CATALOG_TYPE), CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP);
conf.set(InputFormatConfig.catalogPropertyConfigKey(catalogName, CatalogProperties.WAREHOUSE_LOCATION), "/tmp/mylocation");
Optional<Catalog> hadoopCatalog = Catalogs.loadCatalog(conf, catalogName);
Assert.assertTrue(hadoopCatalog.isPresent());
Assertions.assertThat(hadoopCatalog.get()).isInstanceOf(HadoopCatalog.class);
Assert.assertEquals("HadoopCatalog{name=barCatalog, location=/tmp/mylocation}", hadoopCatalog.get().toString());
Properties properties = new Properties();
properties.put(InputFormatConfig.CATALOG_NAME, catalogName);
Assert.assertFalse(Catalogs.hiveCatalog(conf, properties));
}
use of org.apache.iceberg.catalog.Catalog in project hive by apache.
the class TestHiveCatalog method testCreateTableWithCaching.
@Test
public void testCreateTableWithCaching() throws Exception {
Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
String location = temp.newFolder("tbl").toString();
ImmutableMap<String, String> properties = ImmutableMap.of("key1", "value1", "key2", "value2");
Catalog cachingCatalog = CachingCatalog.wrap(catalog);
try {
Table table = cachingCatalog.createTable(tableIdent, schema, spec, location, properties);
Assert.assertEquals(location, table.location());
Assert.assertEquals(2, table.schema().columns().size());
Assert.assertEquals(1, table.spec().fields().size());
Assert.assertEquals("value1", table.properties().get("key1"));
Assert.assertEquals("value2", table.properties().get("key2"));
} finally {
cachingCatalog.dropTable(tableIdent);
}
}
use of org.apache.iceberg.catalog.Catalog in project hive by apache.
the class Catalogs method dropTable.
/**
* Drops an Iceberg table using the catalog specified by the configuration.
* <p>
* The table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) should be specified by
* the controlling properties.
* @param conf a Hadoop conf
* @param props the controlling properties
* @return the created Iceberg table
*/
public static boolean dropTable(Configuration conf, Properties props) {
String location = props.getProperty(LOCATION);
String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
Optional<Catalog> catalog = loadCatalog(conf, catalogName);
if (catalog.isPresent()) {
String name = props.getProperty(NAME);
Preconditions.checkNotNull(name, "Table identifier not set");
return catalog.get().dropTable(TableIdentifier.parse(name));
}
Preconditions.checkNotNull(location, "Table location not set");
return new HadoopTables(conf).dropTable(location);
}
Aggregations