Search in sources :

Example 16 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testPartitionEvolution.

@Test
public void testPartitionEvolution() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "ts", Types.TimestampType.withZone()));
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + " TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (month(ts))");
    PartitionSpec spec = PartitionSpec.builderFor(schema).withSpecId(1).month("ts").build();
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
    shell.executeStatement("ALTER TABLE " + identifier + " SET PARTITION SPEC (day(ts))");
    spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("ts", "ts_month").day("ts").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 17 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransformSameField.

@Test
public void testSetPartitionTransformSameField() {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "truncate_field", Types.StringType.get()), optional(3, "bucket_field", Types.StringType.get()));
    TableIdentifier identifier = TableIdentifier.of("default", "part_test");
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (truncate(2, truncate_field), bucket(2, bucket_field))" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
    PartitionSpec spec = PartitionSpec.builderFor(schema).truncate("truncate_field", 2).bucket("bucket_field", 2).build();
    Table table = testTables.loadTable(identifier);
    Assert.assertEquals(spec, table.spec());
    // Change one, keep one
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(3, truncate_field), bucket(2, bucket_field) )");
    spec = PartitionSpec.builderFor(schema).withSpecId(1).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).truncate("truncate_field", 3, "truncate_field_trunc_3").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
    // Change one again, keep the other one
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (truncate(4, truncate_field), bucket(2, bucket_field) )");
    spec = PartitionSpec.builderFor(schema).withSpecId(2).alwaysNull("truncate_field", "truncate_field_trunc").bucket("bucket_field", 2).alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
    // Keep the already changed, change the other one (change the order of clauses in the spec)
    shell.executeStatement("ALTER TABLE default.part_test " + "SET PARTITION SPEC (bucket(3, bucket_field), truncate(4, truncate_field))");
    spec = PartitionSpec.builderFor(schema).withSpecId(3).alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").alwaysNull("truncate_field", "truncate_field_trunc_3").truncate("truncate_field", 4, "truncate_field_trunc_4").bucket("bucket_field", 3, "bucket_field_bucket_3").build();
    table.refresh();
    Assert.assertEquals(spec, table.spec());
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) BaseTable(org.apache.iceberg.BaseTable) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 18 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testAlterTableChangeColumnTypeAndComment.

@Test
public void testAlterTableChangeColumnTypeAndComment() throws TException, InterruptedException {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    Schema schema = new Schema(optional(1, "customer_id", Types.IntegerType.get()), optional(2, "last_name", Types.StringType.get(), "This is last name"));
    testTables.createTable(shell, identifier.name(), schema, SPEC, FileFormat.PARQUET, ImmutableList.of());
    shell.executeStatement("ALTER TABLE default.customers CHANGE COLUMN " + "customer_id customer_id bigint COMMENT 'This is an identifier'");
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
    List<FieldSchema> icebergSchema = HiveSchemaUtil.convert(icebergTable.schema());
    List<FieldSchema> hmsSchema = hmsTable.getSd().getCols();
    List<FieldSchema> expectedSchema = Lists.newArrayList(new FieldSchema("customer_id", "bigint", "This is an identifier"), new FieldSchema("last_name", "string", "This is last name"));
    Assert.assertEquals(expectedSchema, icebergSchema);
    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
        expectedSchema.stream().filter(fs -> fs.getComment() == null).forEach(fs -> fs.setComment("from deserializer"));
    }
    Assert.assertEquals(expectedSchema, hmsSchema);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Types(org.apache.iceberg.types.Types) UpdateSchema(org.apache.iceberg.UpdateSchema) FileSystem(org.apache.hadoop.fs.FileSystem) URISyntaxException(java.net.URISyntaxException) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) Catalogs(org.apache.iceberg.mr.Catalogs) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) PartitionField(org.apache.iceberg.PartitionField) Lists(org.apache.iceberg.relocated.com.google.common.collect.Lists) StatsSetupConst(org.apache.hadoop.hive.common.StatsSetupConst) Map(java.util.Map) NoSuchTableException(org.apache.iceberg.exceptions.NoSuchTableException) After(org.junit.After) Path(org.apache.hadoop.fs.Path) URI(java.net.URI) Parameterized(org.junit.runners.Parameterized) AssertHelpers(org.apache.iceberg.AssertHelpers) CommitFailedException(org.apache.iceberg.exceptions.CommitFailedException) AfterClass(org.junit.AfterClass) BaseTable(org.apache.iceberg.BaseTable) Collection(java.util.Collection) org.apache.hadoop.hive.serde.serdeConstants(org.apache.hadoop.hive.serde.serdeConstants) InputFormatConfig(org.apache.iceberg.mr.InputFormatConfig) Set(java.util.Set) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) Collectors(java.util.stream.Collectors) PartitionSpecParser(org.apache.iceberg.PartitionSpecParser) SchemaParser(org.apache.iceberg.SchemaParser) Type(org.apache.iceberg.types.Type) Util(org.apache.iceberg.hadoop.Util) List(java.util.List) MetastoreUtil(org.apache.iceberg.hive.MetastoreUtil) PartitionSpec(org.apache.iceberg.PartitionSpec) TableProperties(org.apache.iceberg.TableProperties) ImmutableSet(org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet) BeforeClass(org.junit.BeforeClass) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) GC_ENABLED(org.apache.iceberg.TableProperties.GC_ENABLED) BaseMetastoreTableOperations(org.apache.iceberg.BaseMetastoreTableOperations) Assume(org.junit.Assume) Before(org.junit.Before) Properties(java.util.Properties) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Parameter(org.junit.runners.Parameterized.Parameter) EnvironmentContext(org.apache.hadoop.hive.metastore.api.EnvironmentContext) TException(org.apache.thrift.TException) IOException(java.io.IOException) Test(org.junit.Test) FileFormat(org.apache.iceberg.FileFormat) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) SnapshotSummary(org.apache.iceberg.SnapshotSummary) Record(org.apache.iceberg.data.Record) Rule(org.junit.Rule) Assert(org.junit.Assert) Collections(java.util.Collections) org.apache.hadoop.hive.metastore.api.hive_metastoreConstants(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants) TemporaryFolder(org.junit.rules.TemporaryFolder) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.junit.Test)

Example 19 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestIcebergInputFormats method testProjection.

@Test
public void testProjection() throws Exception {
    helper.createTable();
    List<Record> inputRecords = helper.generateRandomRecords(1, 0L);
    helper.appendToTable(Row.of("2020-03-20", 0), inputRecords);
    Schema projection = TypeUtil.select(SCHEMA, ImmutableSet.of(1));
    builder.project(projection);
    List<Record> outputRecords = testInputFormat.create(builder.conf()).getRecords();
    Assert.assertEquals(inputRecords.size(), outputRecords.size());
    Assert.assertEquals(projection.asStruct(), outputRecords.get(0).struct());
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 20 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergComplexTypeWrites method testWriteMapOfMapsInTable.

@Test
public void testWriteMapOfMapsInTable() throws IOException {
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "mapofmaps", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.MapType.ofRequired(5, 6, Types.StringType.get(), Types.StringType.get()))));
    List<Record> records = TestHelper.generateRandomRecords(schema, 5, 0L);
    testComplexTypeWrite(schema, records);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test)

Aggregations

Schema (org.apache.iceberg.Schema)126 Test (org.junit.Test)93 Record (org.apache.iceberg.data.Record)68 Table (org.apache.iceberg.Table)55 PartitionSpec (org.apache.iceberg.PartitionSpec)39 GenericRecord (org.apache.iceberg.data.GenericRecord)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)30 List (java.util.List)21 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)20 IOException (java.io.IOException)16 Types (org.apache.iceberg.types.Types)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)14 HashMap (java.util.HashMap)13 FileFormat (org.apache.iceberg.FileFormat)13 UpdateSchema (org.apache.iceberg.UpdateSchema)12 Path (org.apache.hadoop.fs.Path)11 Collectors (java.util.stream.Collectors)10 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 TestHelper (org.apache.iceberg.mr.TestHelper)9