use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class Catalogs method createTable.
/**
* Creates an Iceberg table using the catalog specified by the configuration.
* <p>
* The properties should contain the following values:
* <ul>
* <li>Table identifier ({@link Catalogs#NAME}) or table path ({@link Catalogs#LOCATION}) is required
* <li>Table schema ({@link InputFormatConfig#TABLE_SCHEMA}) is required
* <li>Partition specification ({@link InputFormatConfig#PARTITION_SPEC}) is optional. Table will be unpartitioned if
* not provided
* </ul><p>
* Other properties will be handled over to the Table creation. The controlling properties above will not be
* propagated.
* @param conf a Hadoop conf
* @param props the controlling properties
* @return the created Iceberg table
*/
public static Table createTable(Configuration conf, Properties props) {
String schemaString = props.getProperty(InputFormatConfig.TABLE_SCHEMA);
Preconditions.checkNotNull(schemaString, "Table schema not set");
Schema schema = SchemaParser.fromJson(props.getProperty(InputFormatConfig.TABLE_SCHEMA));
String specString = props.getProperty(InputFormatConfig.PARTITION_SPEC);
PartitionSpec spec = PartitionSpec.unpartitioned();
if (specString != null) {
spec = PartitionSpecParser.fromJson(schema, specString);
}
String location = props.getProperty(LOCATION);
String catalogName = props.getProperty(InputFormatConfig.CATALOG_NAME);
// Create a table property map without the controlling properties
Map<String, String> map = Maps.newHashMapWithExpectedSize(props.size());
for (Object key : props.keySet()) {
if (!PROPERTIES_TO_REMOVE.contains(key)) {
map.put(key.toString(), props.get(key).toString());
}
}
Optional<Catalog> catalog = loadCatalog(conf, catalogName);
if (catalog.isPresent()) {
String name = props.getProperty(NAME);
Preconditions.checkNotNull(name, "Table identifier not set");
return catalog.get().createTable(TableIdentifier.parse(name), schema, spec, location, map);
}
Preconditions.checkNotNull(location, "Table location not set");
return new HadoopTables(conf).create(schema, spec, map, location);
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergStatistics method testStatsWithPartitionedInsert.
@Test
public void testStatsWithPartitionedInsert() {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
shell.setHiveSessionValue(HiveConf.ConfVars.HIVESTATSAUTOGATHER.varname, true);
testTables.createTable(shell, identifier.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, ImmutableList.of());
if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
// If the location is set and we have to gather stats, then we have to update the table stats now
shell.executeStatement("ANALYZE TABLE " + identifier + " COMPUTE STATISTICS FOR COLUMNS");
}
String insert = testTables.getInsertQuery(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, identifier, false);
shell.executeStatement(insert);
checkColStat(identifier.name(), "customer_id", true);
checkColStat(identifier.name(), "first_name", true);
checkColStatMinMaxValue(identifier.name(), "customer_id", 0, 2);
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testPartitionEvolution.
@Test
public void testPartitionEvolution() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "ts", Types.TimestampType.withZone()));
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (month(ts))");
PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(1).month("ts").build();
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (day(ts))");
spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("ts", "ts_month").day("ts").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testCreatePartitionedTableWithPropertiesAndWithColumnSpecification.
@Test
public void testCreatePartitionedTableWithPropertiesAndWithColumnSpecification() {
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
AssertHelpers.assertThrows("should throw exception", IllegalArgumentException.class, "Provide only one of the following", () -> {
shell.executeStatement("CREATE EXTERNAL TABLE customers (customer_id BIGINT) " + "PARTITIONED BY (first_name STRING) " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(TableIdentifier.of("default", "customers")) + testTables.propertiesForCreateTableSQL(ImmutableMap.of(InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(spec))));
});
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransformSameField.
@Test
public void testSetPartitionTransformSameField() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "truncate_field", Types.StringType.get()), optional(3, "bucket_field", Types.StringType.get()));
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("truncate_field", 2).bucket("bucket_field", 2).build();
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
// Change one, keep one
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
spec = PartitionSpec.builderFor(schema).withSpecId(1).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).truncate("truncate_field", 3, "truncate_field_trunc_3").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
// Change one again, keep the other one
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
// Keep the already changed, change the other one (change the order of clauses in the spec)
shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
spec = PartitionSpec.builderFor(schema).withSpecId(3).alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").bucket("bucket_field", 3, "bucket_field_bucket_3").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
}
Aggregations