Search in sources :

Example 96 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testAddColumnToIcebergTable.

@Test
public void testAddColumnToIcebergTable() throws IOException {
    // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data.
    Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    // Add a new column (age long) to the Iceberg table.
    icebergTable.updateSchema().addColumn("age", Types.LongType.get()).commit();
    if (testTableType != TestTables.TestTableType.HIVE_CATALOG) {
        // We need to update columns for non-Hive catalogs
        shell.executeStatement("ALTER TABLE customers UPDATE COLUMNS");
    }
    Schema customerSchemaWithAge = new Schema(optional(1, "customer_id", Types.LongType.get()), optional(2, "first_name", Types.StringType.get(), "This is first name"), optional(3, "last_name", Types.StringType.get(), "This is last name"), optional(4, "age", Types.LongType.get()));
    // Also add a new entry to the table where the age column is set.
    icebergTable = testTables.loadTable(TableIdentifier.of("default", "customers"));
    List<Record> newCustomerWithAge = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAge).add(3L, "James", "Red", 34L).add(4L, "Lily", "Blue", null).build();
    testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newCustomerWithAge);
    // Do a 'select *' from Hive and check if the age column appears in the result.
    // It should be null for the old data and should be filled for the data added after the column addition.
    TestHelper.RecordsBuilder customersWithAgeBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAge).add(0L, "Alice", "Brown", null).add(1L, "Bob", "Green", null).add(2L, "Trudy", "Pink", null).add(3L, "James", "Red", 34L).add(4L, "Lily", "Blue", null);
    List<Record> customersWithAge = customersWithAgeBuilder.build();
    List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithAge, HiveIcebergTestUtils.valueForRow(customerSchemaWithAge, rows), 0);
    // Do a 'select customer_id, age' from Hive to check if the new column can be queried from Hive.
    // The customer_id is needed because of the result sorting.
    Schema customerSchemaWithAgeOnly = new Schema(optional(1, "customer_id", Types.LongType.get()), optional(4, "age", Types.LongType.get()));
    TestHelper.RecordsBuilder customerWithAgeOnlyBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaWithAgeOnly).add(0L, null).add(1L, null).add(2L, null).add(3L, 34L).add(4L, null);
    List<Record> customersWithAgeOnly = customerWithAgeOnlyBuilder.build();
    rows = shell.executeStatement("SELECT customer_id, age FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithAgeOnly, HiveIcebergTestUtils.valueForRow(customerSchemaWithAgeOnly, rows), 0);
    // Insert some data with age column from Hive. Insert an entry with null age and an entry with filled age.
    shell.executeStatement("INSERT INTO default.customers values (5L, 'Lily', 'Magenta', NULL), (6L, 'Roni', 'Purple', 23L)");
    customersWithAgeBuilder.add(5L, "Lily", "Magenta", null).add(6L, "Roni", "Purple", 23L);
    customersWithAge = customersWithAgeBuilder.build();
    rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithAge, HiveIcebergTestUtils.valueForRow(customerSchemaWithAge, rows), 0);
    customerWithAgeOnlyBuilder.add(5L, null).add(6L, 23L);
    customersWithAgeOnly = customerWithAgeOnlyBuilder.build();
    rows = shell.executeStatement("SELECT customer_id, age FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithAgeOnly, HiveIcebergTestUtils.valueForRow(customerSchemaWithAgeOnly, rows), 0);
}
Also used : TestHelper(org.apache.iceberg.mr.TestHelper) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 97 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method testArrayOfMapsInTable.

@Test
public void testArrayOfMapsInTable() throws IOException {
    Schema schema = new Schema(required(1, "arrayofmaps", Types.ListType.ofRequired(2, Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.BooleanType.get()))));
    List<Record> records = testTables.createTableWithGeneratedRecords(shell, "arraytable", schema, fileFormat, 1);
    // access an element from a map in an array
    for (int i = 0; i < records.size(); i++) {
        List<?> expectedList = (List<?>) records.get(i).getField("arrayofmaps");
        for (int j = 0; j < expectedList.size(); j++) {
            Map<?, ?> expectedMap = (Map<?, ?>) expectedList.get(j);
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT arrayofmaps[%d][\"%s\"] FROM default.arraytable LIMIT 1 OFFSET %d", j, entry.getKey(), i));
                Assert.assertEquals(entry.getValue(), queryResult.get(0)[0]);
            }
        }
    }
}
Also used : Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) ArrayList(java.util.ArrayList) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) List(java.util.List) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 98 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method testMapOfPrimitivesInTable.

@Test
public void testMapOfPrimitivesInTable() throws IOException {
    Schema schema = new Schema(required(1, "mapofprimitives", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.IntegerType.get())));
    List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
    // access a single value from the map
    for (int i = 0; i < records.size(); i++) {
        Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofprimitives");
        for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
            List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofprimitives[\"%s\"] " + "FROM default.maptable LIMIT 1 OFFSET %d", entry.getKey(), i));
            Assert.assertEquals(entry.getValue(), queryResult.get(0)[0]);
        }
    }
}
Also used : Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 99 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method testMapOfStructsInTable.

@Test
public void testMapOfStructsInTable() throws IOException {
    Schema schema = new Schema(required(1, "mapofstructs", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.StructType.of(required(4, "something", Types.DoubleType.get()), required(5, "someone", Types.LongType.get()), required(6, "somewhere", Types.StringType.get())))));
    List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
    // access a single element from a struct in a map
    for (int i = 0; i < records.size(); i++) {
        Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofstructs");
        for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
            List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofstructs[\"%s\"].something, " + "mapofstructs[\"%s\"].someone, mapofstructs[\"%s\"].somewhere FROM default.maptable LIMIT 1 " + "OFFSET %d", entry.getKey(), entry.getKey(), entry.getKey(), i));
            GenericRecord genericRecord = (GenericRecord) entry.getValue();
            Assert.assertEquals(genericRecord.getField("something"), queryResult.get(0)[0]);
            Assert.assertEquals(genericRecord.getField("someone"), queryResult.get(0)[1]);
            Assert.assertEquals(genericRecord.getField("somewhere"), queryResult.get(0)[2]);
        }
    }
}
Also used : Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Example 100 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergStorageHandlerLocalScan method testMapOfArraysInTable.

@Test
public void testMapOfArraysInTable() throws IOException {
    Schema schema = new Schema(required(1, "mapofarrays", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.ListType.ofRequired(4, Types.DateType.get()))));
    List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
    // access a single element from a list in a map
    for (int i = 0; i < records.size(); i++) {
        Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofarrays");
        for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
            List<?> expectedList = (List<?>) entry.getValue();
            for (int j = 0; j < expectedList.size(); j++) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofarrays[\"%s\"]" + "[%d] FROM maptable LIMIT 1 OFFSET %d", entry.getKey(), j, i));
                Assert.assertEquals(expectedList.get(j).toString(), queryResult.get(0)[0]);
            }
        }
    }
}
Also used : Schema(org.apache.iceberg.Schema) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) ArrayList(java.util.ArrayList) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) List(java.util.List) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map) Test(org.junit.Test)

Aggregations

Record (org.apache.iceberg.data.Record)114 Test (org.junit.Test)99 Schema (org.apache.iceberg.Schema)68 Table (org.apache.iceberg.Table)51 GenericRecord (org.apache.iceberg.data.GenericRecord)51 PartitionSpec (org.apache.iceberg.PartitionSpec)19 ArrayList (java.util.ArrayList)14 List (java.util.List)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 HashMap (java.util.HashMap)11 IcebergBaseTest (org.apache.drill.metastore.iceberg.IcebergBaseTest)11 TestHelper (org.apache.iceberg.mr.TestHelper)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 Types (org.apache.iceberg.types.Types)10 Map (java.util.Map)9 IOException (java.io.IOException)8 ImmutableMap (org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap)8 FileFormat (org.apache.iceberg.FileFormat)7 DeleteFile (org.apache.iceberg.DeleteFile)6 NestedField.optional (org.apache.iceberg.types.Types.NestedField.optional)6