Search in sources :

Example 71 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergStorageHandlerNoScan method testAlterTableReplaceColumns.

@Test
public void testAlterTableReplaceColumns() throws TException, InterruptedException {
    TableIdentifier identifier = TableIdentifier.of("default", "customers");
    Schema schema = new Schema(optional(1, "customer_id", Types.IntegerType.get()), optional(2, "first_name", Types.StringType.get(), "This is first name"), optional(3, "last_name", Types.StringType.get(), "This is last name"), optional(4, "address", Types.StructType.of(optional(5, "city", Types.StringType.get()), optional(6, "street", Types.StringType.get())), null));
    testTables.createTable(shell, identifier.name(), schema, SPEC, FileFormat.PARQUET, ImmutableList.of());
    shell.executeStatement("ALTER TABLE default.customers REPLACE COLUMNS " + "(customer_id int, last_name string COMMENT 'This is last name', " + "address struct<city:string,street:string>)");
    org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
    org.apache.hadoop.hive.metastore.api.Table hmsTable = shell.metastore().getTable("default", "customers");
    List<FieldSchema> icebergSchema = HiveSchemaUtil.convert(icebergTable.schema());
    List<FieldSchema> hmsSchema = hmsTable.getSd().getCols();
    List<FieldSchema> expectedSchema = Lists.newArrayList(new FieldSchema("customer_id", "int", null), // first_name column is dropped
    new FieldSchema("last_name", "string", "This is last name"), new FieldSchema("address", "struct<city:string,street:string>", null));
    Assert.assertEquals(expectedSchema, icebergSchema);
    Assert.assertEquals(expectedSchema, hmsSchema);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) UpdateSchema(org.apache.iceberg.UpdateSchema) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.junit.Test)

Example 72 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied.

@Test
public void testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied() throws IOException {
    Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
    Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
    // add one more row to the same partition
    shell.executeStatement("insert into customers values (1, 'Bob', 'Hoover')");
    // delete all rows with id=1 and first_name=Bob
    Schema shorterSchema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "name", Types.StringType.get()));
    List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(shorterSchema).add(1L, "Bob").build();
    DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
    tbl.newRowDelta().addDeletes(deleteFile).commit();
    List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
    Assert.assertEquals(2, objects.size());
    Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
    Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Example 73 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergComplexTypeWrites method testWriteArrayOfArraysInTable.

@Test
public void testWriteArrayOfArraysInTable() throws IOException {
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "arrayofarrays", Types.ListType.ofRequired(3, Types.ListType.ofRequired(4, Types.StringType.get()))));
    List<Record> records = TestHelper.generateRandomRecords(schema, 3, 1L);
    testComplexTypeWrite(schema, records);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test)

Example 74 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergComplexTypeWrites method testWriteMapOfArraysInTable.

@Test
public void testWriteMapOfArraysInTable() throws IOException {
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "mapofarrays", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.ListType.ofRequired(5, Types.StringType.get()))));
    List<Record> records = TestHelper.generateRandomRecords(schema, 5, 0L);
    testComplexTypeWrite(schema, records);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test)

Example 75 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergComplexTypeWrites method testWriteMapOfStructsInTable.

@Test
public void testWriteMapOfStructsInTable() throws IOException {
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "mapofstructs", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.StructType.of(required(5, "something", Types.StringType.get()), required(6, "someone", Types.StringType.get()), required(7, "somewhere", Types.StringType.get())))));
    List<Record> records = TestHelper.generateRandomRecords(schema, 5, 0L);
    testComplexTypeWrite(schema, records);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test)

Aggregations

Schema (org.apache.iceberg.Schema)126 Test (org.junit.Test)93 Record (org.apache.iceberg.data.Record)68 Table (org.apache.iceberg.Table)55 PartitionSpec (org.apache.iceberg.PartitionSpec)39 GenericRecord (org.apache.iceberg.data.GenericRecord)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)30 List (java.util.List)21 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)20 IOException (java.io.IOException)16 Types (org.apache.iceberg.types.Types)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)14 HashMap (java.util.HashMap)13 FileFormat (org.apache.iceberg.FileFormat)13 UpdateSchema (org.apache.iceberg.UpdateSchema)12 Path (org.apache.hadoop.fs.Path)11 Collectors (java.util.stream.Collectors)10 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 TestHelper (org.apache.iceberg.mr.TestHelper)9