Search in sources :

Example 36 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project hive by apache.

the class TestHiveCatalog method testTableName.

@Test
public void testTableName() {
    Schema schema = new Schema(required(1, "id", Types.IntegerType.get(), "unique ID"), required(2, "data", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("data", 16).build();
    TableIdentifier tableIdent = TableIdentifier.of(DB_NAME, "tbl");
    try {
        catalog.buildTable(tableIdent, schema).withPartitionSpec(spec).create();
        Table table = catalog.loadTable(tableIdent);
        Assert.assertEquals("Name must match", "hive.hivedb.tbl", table.name());
        TableIdentifier snapshotsTableIdent = TableIdentifier.of(DB_NAME, "tbl", "snapshots");
        Table snapshotsTable = catalog.loadTable(snapshotsTableIdent);
        Assert.assertEquals("Name must match", "hive.hivedb.tbl.snapshots", snapshotsTable.name());
    } finally {
        catalog.dropTable(tableIdent);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 37 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project hive by apache.

the class IcebergTableUtil method updateSpec.

public static void updateSpec(Configuration configuration, Table table) {
    // get the new partition transform spec
    PartitionSpec newPartitionSpec = spec(configuration, table.schema());
    if (newPartitionSpec == null) {
        LOG.debug("Iceberg Partition spec is not updated due to empty partition spec definition.");
        return;
    }
    // delete every field from the old partition spec
    UpdatePartitionSpec updatePartitionSpec = table.updateSpec().caseSensitive(false);
    table.spec().fields().forEach(field -> updatePartitionSpec.removeField(field.name()));
    List<PartitionTransformSpec> partitionTransformSpecList = SessionStateUtil.getResource(configuration, hive_metastoreConstants.PARTITION_TRANSFORM_SPEC).map(o -> (List<PartitionTransformSpec>) o).orElseGet(() -> null);
    partitionTransformSpecList.forEach(spec -> {
        switch(spec.getTransformType()) {
            case IDENTITY:
                updatePartitionSpec.addField(spec.getColumnName());
                break;
            case YEAR:
                updatePartitionSpec.addField(Expressions.year(spec.getColumnName()));
                break;
            case MONTH:
                updatePartitionSpec.addField(Expressions.month(spec.getColumnName()));
                break;
            case DAY:
                updatePartitionSpec.addField(Expressions.day(spec.getColumnName()));
                break;
            case HOUR:
                updatePartitionSpec.addField(Expressions.hour(spec.getColumnName()));
                break;
            case TRUNCATE:
                updatePartitionSpec.addField(Expressions.truncate(spec.getColumnName(), spec.getTransformParam().get()));
                break;
            case BUCKET:
                updatePartitionSpec.addField(Expressions.bucket(spec.getColumnName(), spec.getTransformParam().get()));
                break;
        }
    });
    updatePartitionSpec.commit();
}
Also used : PartitionTransformSpec(org.apache.hadoop.hive.ql.parse.PartitionTransformSpec) Properties(java.util.Properties) Logger(org.slf4j.Logger) Table(org.apache.iceberg.Table) Catalogs(org.apache.iceberg.mr.Catalogs) LoggerFactory(org.slf4j.LoggerFactory) Schema(org.apache.iceberg.Schema) List(java.util.List) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) Configuration(org.apache.hadoop.conf.Configuration) PartitionSpec(org.apache.iceberg.PartitionSpec) SessionStateUtil(org.apache.hadoop.hive.ql.session.SessionStateUtil) QueryState(org.apache.hadoop.hive.ql.QueryState) Expressions(org.apache.iceberg.expressions.Expressions) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) List(java.util.List) UpdatePartitionSpec(org.apache.iceberg.UpdatePartitionSpec) PartitionSpec(org.apache.iceberg.PartitionSpec) PartitionTransformSpec(org.apache.hadoop.hive.ql.parse.PartitionTransformSpec)

Example 38 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransformCaseSensitive.

@Test
public void testSetPartitionTransformCaseSensitive() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "truncate_field", Types.StringType.get()), optional(3, "bucket_field", Types.StringType.get()));
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("truncate_field", 2).bucket("bucket_field", 2).build();
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncaTe(3, truncate_Field), buCket(3, bUckeT_field))");
    spec = PartitionSpec.builderFor(schema).withSpecId(1).alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").truncate("truncate_field", 3, "truncate_field_trunc_3").bucket("bucket_field", 3, "bucket_field_bucket_3").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 39 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project hive by apache.

the class TestHiveIcebergTruncateTable method testTruncateTablePartitionedIcebergTable.

@Test
public void testTruncateTablePartitionedIcebergTable() throws IOException, TException, InterruptedException {
    // Create a partitioned Iceberg table with some initial data and run a truncate table command on this table.
    // Then check if the data is deleted and the table statistics are reset to 0.
    String databaseName = "default";
    String tableName = "customers";
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
    List<Record> records = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Alice", "Brown").add(1L, "Bob", "Brown").add(2L, "Trudy", "Green").add(3L, "John", "Pink").add(4L, "Jane", "Pink").build();
    Table icebergTable = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, records);
    testTruncateTable(databaseName, tableName, icebergTable, records, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
}
Also used : Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 40 with PartitionSpec

use of org.apache.iceberg.PartitionSpec in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCommandsWithPartitionClauseThrow.

@Test
public void testCommandsWithPartitionClauseThrow() {
    TableIdentifier target = TableIdentifier.of("default", "target");
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
    testTables.createTable(shell, target.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, FileFormat.PARQUET, ImmutableList.of());
    String[] commands = { "INSERT INTO target PARTITION (last_name='Johnson') VALUES (1, 'Rob')", "INSERT OVERWRITE TABLE target PARTITION (last_name='Johnson') SELECT * FROM target WHERE FALSE", "DESCRIBE target PARTITION (last_name='Johnson')", "TRUNCATE target PARTITION (last_name='Johnson')" };
    for (String command : commands) {
        AssertHelpers.assertThrows("Should throw unsupported operation exception for queries with partition spec", IllegalArgumentException.class, "Using partition spec in query is unsupported", () -> shell.executeStatement(command));
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Aggregations

PartitionSpec (org.apache.iceberg.PartitionSpec)63 Table (org.apache.iceberg.Table)40 Test (org.junit.Test)39 Schema (org.apache.iceberg.Schema)38 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)19 Record (org.apache.iceberg.data.Record)19 List (java.util.List)10 ArrayList (java.util.ArrayList)9 FileFormat (org.apache.iceberg.FileFormat)9 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)8 IOException (java.io.IOException)7 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)7 UpdateSchema (org.apache.iceberg.UpdateSchema)6 Path (org.apache.hadoop.fs.Path)5 BaseTable (org.apache.iceberg.BaseTable)5 DataFile (org.apache.iceberg.DataFile)5 PartitionField (org.apache.iceberg.PartitionField)4 Types (org.apache.iceberg.types.Types)4 HdfsContext (com.facebook.presto.hive.HdfsContext)3 PrestoException (com.facebook.presto.spi.PrestoException)3