Search in sources :

Example 36 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testIcebergAndHmsTableProperties.

@Test
public void testIcebergAndHmsTableProperties() throws Exception {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    shell.executeStatement(String.format("CREATE EXTERNAL TABLE default.customers " + "STORED BY ICEBERG %s" + "TBLPROPERTIES ('%s'='%s', '%s'='%s', '%s'='%s', '%s'='%s')", // we need the location for HadoopTable based tests only
    testTables.locationForCreateTableSQL(identifier), InputFormatConfig.TABLE_SCHEMA, SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA), InputFormatConfig.PARTITION_SPEC, PartitionSpecParser.toJson(SPEC), "custom_property", "initial_val", InputFormatConfig.CATALOG_NAME, testTables.catalogName()));
    // Check the Iceberg table parameters
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    Map<String, String> expectedIcebergProperties = new HashMap<>();
    expectedIcebergProperties.put("custom_property", "initial_val");
    expectedIcebergProperties.put("EXTERNAL", "TRUE");
    expectedIcebergProperties.put("storage_handler", HiveIcebergStorageHandler.class.getName());
    expectedIcebergProperties.put(serdeConstants.SERIALIZATION_FORMAT, "1");
    // Check the HMS table parameters
    org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
    Map<String, String> hmsParams = hmsTable.getParameters().entrySet().stream().filter(e -> !IGNORED_PARAMS.contains(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    Properties tableProperties = new Properties();
    tableProperties.putAll(hmsParams);
    if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        expectedIcebergProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true");
    }
    if (MetastoreUtil.hive3PresentOnClasspath()) {
        expectedIcebergProperties.put("bucketing_version", "2");
    }
    Assert.assertEquals(expectedIcebergProperties, icebergTable.properties());
    if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        Assert.assertEquals(10, hmsParams.size());
        Assert.assertEquals("initial_val", hmsParams.get("custom_property"));
        Assert.assertEquals("TRUE", hmsParams.get(InputFormatConfig.EXTERNAL_TABLE_PURGE));
        Assert.assertEquals("TRUE", hmsParams.get("EXTERNAL"));
        Assert.assertEquals("true", hmsParams.get(TableProperties.ENGINE_HIVE_ENABLED));
        Assert.assertEquals(HiveIcebergStorageHandler.class.getName(), hmsParams.get(hive_metastoreConstants.META_TABLE_STORAGE));
        Assert.assertEquals(BaseMetastoreTableOperations.ICEBERG_TABLE_TYPE_VALUE.toUpperCase(), hmsParams.get(BaseMetastoreTableOperations.TABLE_TYPE_PROP));
        Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP), getCurrentSnapshotForHiveCatalogTable(icebergTable));
        Assert.assertNull(hmsParams.get(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP));
        Assert.assertNotNull(hmsParams.get(hive_metastoreConstants.DDL_TIME));
        Assert.assertNotNull(hmsParams.get(serdeConstants.SERIALIZATION_FORMAT));
    } else {
        Assert.assertEquals(7, hmsParams.size());
        Assert.assertNull(hmsParams.get(TableProperties.ENGINE_HIVE_ENABLED));
    }
    // Check HMS inputformat/outputformat/serde
    Assert.assertEquals(HiveIcebergInputFormat.class.getName(), hmsTable.getSd().getInputFormat());
    Assert.assertEquals(HiveIcebergOutputFormat.class.getName(), hmsTable.getSd().getOutputFormat());
    Assert.assertEquals(HiveIcebergSerDe.class.getName(), hmsTable.getSd().getSerdeInfo().getSerializationLib());
    // Add two new properties to the Iceberg table and update an existing one
    icebergTable.updateProperties().set("new_prop_1", "true").set("new_prop_2", "false").set("custom_property", "new_val").commit();
    // Refresh the HMS table to see if new Iceberg properties got synced into HMS
    hmsParams = shell.metastore().getTable("default", "customers").getParameters().entrySet().stream().filter(e -> !IGNORED_PARAMS.contains(e.getKey())).collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
    if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        // 2 newly-added properties + previous_metadata_location prop
        Assert.assertEquals(13, hmsParams.size());
        Assert.assertEquals("true", hmsParams.get("new_prop_1"));
        Assert.assertEquals("false", hmsParams.get("new_prop_2"));
        Assert.assertEquals("new_val", hmsParams.get("custom_property"));
        String prevSnapshot = getCurrentSnapshotForHiveCatalogTable(icebergTable);
        icebergTable.refresh();
        String newSnapshot = getCurrentSnapshotForHiveCatalogTable(icebergTable);
        Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.PREVIOUS_METADATA_LOCATION_PROP), prevSnapshot);
        Assert.assertEquals(hmsParams.get(BaseMetastoreTableOperations.METADATA_LOCATION_PROP), newSnapshot);
    } else {
        Assert.assertEquals(7, hmsParams.size());
    }
    // Remove some Iceberg props and see if they're removed from HMS table props as well
    if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        icebergTable.updateProperties().remove("custom_property").remove("new_prop_1").commit();
        hmsParams = shell.metastore().getTable("default", "customers").getParameters();
        Assert.assertFalse(hmsParams.containsKey("custom_property"));
        Assert.assertFalse(hmsParams.containsKey("new_prop_1"));
        Assert.assertTrue(hmsParams.containsKey("new_prop_2"));
    }
    // append some data and check whether HMS stats are aligned with snapshot summary
    if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        List<Record> records = HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS;
        testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, FileFormat.PARQUET, null, records);
        hmsParams = shell.metastore().getTable("default", "customers").getParameters();
        Map<String, String> summary = icebergTable.currentSnapshot().summary();
        Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_DATA_FILES_PROP), hmsParams.get(StatsSetupConst.NUM_FILES));
        Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_RECORDS_PROP), hmsParams.get(StatsSetupConst.ROW_COUNT));
        Assert.assertEquals(summary.get(SnapshotSummary.TOTAL_FILE_SIZE_PROP), hmsParams.get(StatsSetupConst.TOTAL_SIZE));
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) PartitionField(org.apache.iceberg.PartitionField) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) Map(java.util.Map) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) After(org.junit.After) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Parameterized(org.junit.runners.Parameterized) AssertHelpers(org.apache.iceberg.AssertHelpers) CommitFailedException(org.apache.iceberg.exceptions.CommitFailedException) AfterClass(org.junit.AfterClass) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Type(org.apache.iceberg.types.Type) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) MetastoreUtil(org.apache.iceberg.hive.MetastoreUtil) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) BeforeClass(org.junit.BeforeClass) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GC_ENABLED(org.apache.iceberg.TableProperties.GC_ENABLED) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) Assume(org.junit.Assume) Before(org.junit.Before) Properties(java.util.Properties) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Parameter(org.junit.runners.Parameterized.Parameter) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) IOException(java.io.IOException) Test(org.junit.Test) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SnapshotSummary(org.apache.iceberg.SnapshotSummary) Record(org.apache.iceberg.data.Record) Rule(org.junit.Rule) Assert(org.junit.Assert) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) TemporaryFolder(org.junit.rules.TemporaryFolder) HashMap(java.util.HashMap) Table(org.apache.iceberg.Table) TableProperties(org.apache.iceberg.TableProperties) Properties(java.util.Properties) Record(org.apache.iceberg.data.Record) Map(java.util.Map) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 37 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCreateTableWithoutColumnComments.

@Test
public void testCreateTableWithoutColumnComments() {
    TableIdentifier identifier = TableIdentifier.of("default", "without_comment_table");
    shell.executeStatement("CREATE EXTERNAL TABLE without_comment_table (" + "t_int INT,  " + "t_string STRING) " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()));
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    List<Object[]> rows = shell.executeStatement("DESCRIBE default.without_comment_table");
    Assert.assertEquals(icebergTable.schema().columns().size(), rows.size());
    for (int i = 0; i < icebergTable.schema().columns().size(); i++) {
        Types.NestedField field = icebergTable.schema().columns().get(i);
        Assert.assertNull(field.doc());
        Assert.assertArrayEquals(new Object[] { field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), "from deserializer" }, rows.get(i));
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 38 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransformSameField.

@Test
public void testSetPartitionTransformSameField() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "truncate_field", Types.StringType.get()), optional(3, "bucket_field", Types.StringType.get()));
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("truncate_field", 2).bucket("bucket_field", 2).build();
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
    // Change one, keep one
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
    spec = PartitionSpec.builderFor(schema).withSpecId(1).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).truncate("truncate_field", 3, "truncate_field_trunc_3").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
    // Change one again, keep the other one
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
    spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
    // Keep the already changed, change the other one (change the order of clauses in the spec)
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
    spec = PartitionSpec.builderFor(schema).withSpecId(3).alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").bucket("bucket_field", 3, "bucket_field_bucket_3").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 39 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testAlterTableChangeColumnTypeAndComment.

@Test
public void testAlterTableChangeColumnTypeAndComment() throws TException, InterruptedException {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    Schema schema = new Schema(optional(1, "customer_id", Types.IntegerType.get()), optional(2, "last_name", Types.StringType.get(), "This is last name"));
    testTables.createTable(shell, identifier.name(), schema, SPEC, FileFormat.PARQUET, ImmutableList.of());
    shell.executeStatement("ALTER TABLE default.customers CHANGE COLUMN " + "customer_id customer_id bigint COMMENT 'This is an identifier'");
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
    List<FieldSchema> icebergSchema = HiveSchemaUtil.convert(icebergTable.schema());
    List<FieldSchema> hmsSchema = hmsTable.getSd().getCols();
    List<FieldSchema> expectedSchema = Lists.newArrayList(new FieldSchema("customer_id", "bigint", "This is an identifier"), new FieldSchema("last_name", "string", "This is last name"));
    Assert.assertEquals(expectedSchema, icebergSchema);
    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
        expectedSchema.stream().filter(fs -> fs.getComment() == null).forEach(fs -> fs.setComment("from deserializer"));
    }
    Assert.assertEquals(expectedSchema, hmsSchema);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) PartitionField(org.apache.iceberg.PartitionField) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) Map(java.util.Map) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) After(org.junit.After) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Parameterized(org.junit.runners.Parameterized) AssertHelpers(org.apache.iceberg.AssertHelpers) CommitFailedException(org.apache.iceberg.exceptions.CommitFailedException) AfterClass(org.junit.AfterClass) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Type(org.apache.iceberg.types.Type) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) MetastoreUtil(org.apache.iceberg.hive.MetastoreUtil) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) BeforeClass(org.junit.BeforeClass) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GC_ENABLED(org.apache.iceberg.TableProperties.GC_ENABLED) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) Assume(org.junit.Assume) Before(org.junit.Before) Properties(java.util.Properties) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Parameter(org.junit.runners.Parameterized.Parameter) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) IOException(java.io.IOException) Test(org.junit.Test) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SnapshotSummary(org.apache.iceberg.SnapshotSummary) Record(org.apache.iceberg.data.Record) Rule(org.junit.Rule) Assert(org.junit.Assert) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) TemporaryFolder(org.junit.rules.TemporaryFolder) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.junit.Test)

Example 40 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestTables method createTable.

/**
 * Creates an non partitioned Hive test table. Creates the Iceberg table/data and creates the corresponding Hive
 * table as well when needed. The table will be in the 'default' database. The table will be populated with the
 * provided List of {@link Record}s.
 * @param shell The HiveShell used for Hive table creation
 * @param tableName The name of the test table
 * @param schema The schema used for the table creation
 * @param fileFormat The file format used for writing the data
 * @param records The records with which the table is populated
 * @param formatVersion The version of the spec the table should use (format-version)
 * @return The created table
 * @throws IOException If there is an error writing data
 */
public Table createTable(TestHiveShell shell, String tableName, Schema schema, FileFormat fileFormat, List<Record> records, int formatVersion) throws IOException {
    Map<String, String> tblProps = ImmutableMap.of(TableProperties.FORMAT_VERSION, Integer.toString(formatVersion));
    Table table = createIcebergTable(shell.getHiveConf(), tableName, schema, fileFormat, tblProps, records);
    String createHiveSQL = createHiveTableSQL(TableIdentifier.of("default", tableName), tblProps);
    if (createHiveSQL != null) {
        shell.executeStatement(createHiveSQL);
    }
    return table;
}
Also used : Table(org.apache.iceberg.Table)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15