Search in sources :

Example 31 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStatistics method testAnalyzeTableComputeStatistics.

@Test
public void testAnalyzeTableComputeStatistics() throws IOException, TException, InterruptedException {
    String dbName = "default";
    String tableName = "customers";
    Table table = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    shell.executeStatement("ANALYZE TABLE " + dbName + "." + tableName + " COMPUTE STATISTICS");
    validateBasicStats(table, dbName, tableName);
}
Also used : Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 32 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCreateTableStoredByIcebergWithSerdeProperties.

@Test
public void testCreateTableStoredByIcebergWithSerdeProperties() {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    String query = String.format("CREATE EXTERNAL TABLE customers (customer_id BIGINT, first_name STRING, last_name " + "STRING) STORED BY iceberg WITH SERDEPROPERTIES('%s'='%s') %s TBLPROPERTIES ('%s'='%s')", TableProperties.DEFAULT_FILE_FORMAT, "orc", testTables.locationForCreateTableSQL(identifier), InputFormatConfig.CATALOG_NAME, testTables.catalogName());
    shell.executeStatement(query);
    Table table = testTables.loadTable(identifier);
    Assert.assertNotNull(table);
    Assert.assertEquals("orc", table.properties().get(TableProperties.DEFAULT_FILE_FORMAT));
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 33 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testCreateTableWithColumnComments.

@Test
public void testCreateTableWithColumnComments() {
    TableIdentifier identifier = TableIdentifier.of("default", "comment_table");
    shell.executeStatement("CREATE EXTERNAL TABLE comment_table (" + "t_int INT COMMENT 'int column',  " + "t_string STRING COMMENT 'string column', " + "t_string_2 STRING) " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + testTables.propertiesForCreateTableSQL(ImmutableMap.of()));
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    List<Object[]> rows = shell.executeStatement("DESCRIBE default.comment_table");
    Assert.assertEquals(icebergTable.schema().columns().size(), rows.size());
    for (int i = 0; i < icebergTable.schema().columns().size(); i++) {
        Types.NestedField field = icebergTable.schema().columns().get(i);
        Assert.assertArrayEquals(new Object[] { field.name(), HiveSchemaUtil.convert(field.type()).getTypeName(), field.doc() != null ? field.doc() : "from deserializer" }, rows.get(i));
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 34 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testPartitionEvolution.

@Test
public void testPartitionEvolution() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "ts", Types.TimestampType.withZone()));
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (month(ts))");
    PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(1).month("ts").build();
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
    shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (day(ts))");
    spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("ts", "ts_month").day("ts").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 35 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testDeleteBackingTable.

@Test
public void testDeleteBackingTable() throws TException, IOException, InterruptedException {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    shell.executeStatement("CREATE EXTERNAL TABLE customers " + "STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "', " + "'" + InputFormatConfig.EXTERNAL_TABLE_PURGE + "'='FALSE', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
    Properties tableProperties = new Properties();
    hmsTable.getParameters().entrySet().stream().filter(e -> !IGNORED_PARAMS.contains(e.getKey())).forEach(e -> tableProperties.put(e.getKey(), e.getValue()));
    if (!Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) {
        shell.executeStatement("DROP TABLE customers");
        // Check if the table remains
        testTables.loadTable(identifier);
    } else {
        // Check the HMS table parameters
        Path hmsTableLocation = new Path(hmsTable.getSd().getLocation());
        // Drop the table
        shell.executeStatement("DROP TABLE customers");
        // Check if we drop an exception when trying to drop the table
        AssertHelpers.assertThrows("should throw exception", NoSuchTableException.class, "Table does not exist", () -> {
            testTables.loadTable(identifier);
        });
        // Check if the files are kept
        FileSystem fs = Util.getFs(hmsTableLocation, shell.getHiveConf());
        Assert.assertEquals(1, fs.listStatus(hmsTableLocation).length);
        Assert.assertEquals(1, fs.listStatus(new Path(hmsTableLocation, "metadata")).length);
    }
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) PartitionField(org.apache.iceberg.PartitionField) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) Map(java.util.Map) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) After(org.junit.After) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Parameterized(org.junit.runners.Parameterized) AssertHelpers(org.apache.iceberg.AssertHelpers) CommitFailedException(org.apache.iceberg.exceptions.CommitFailedException) AfterClass(org.junit.AfterClass) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Type(org.apache.iceberg.types.Type) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) MetastoreUtil(org.apache.iceberg.hive.MetastoreUtil) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) BeforeClass(org.junit.BeforeClass) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GC_ENABLED(org.apache.iceberg.TableProperties.GC_ENABLED) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) Assume(org.junit.Assume) Before(org.junit.Before) Properties(java.util.Properties) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Parameter(org.junit.runners.Parameterized.Parameter) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) IOException(java.io.IOException) Test(org.junit.Test) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SnapshotSummary(org.apache.iceberg.SnapshotSummary) Record(org.apache.iceberg.data.Record) Rule(org.junit.Rule) Assert(org.junit.Assert) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) TemporaryFolder(org.junit.rules.TemporaryFolder) Path(org.apache.hadoop.fs.Path) FileSystem(org.apache.hadoop.fs.FileSystem) TableProperties(org.apache.iceberg.TableProperties) Properties(java.util.Properties) Test(org.junit.Test)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15