Search in sources :

Example 31 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergTypes method testDecimalTableWithPredicateLiterals.

@Test
public void testDecimalTableWithPredicateLiterals() throws IOException {
    Schema schema = new Schema(required(1, "decimal_field", Types.DecimalType.of(7, 2)));
    List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(new BigDecimal("85.00")).add(new BigDecimal("100.56")).add(new BigDecimal("100.57")).build();
    testTables.createTable(shell, "dec_test", schema, fileFormat, records);
    // Use integer literal in predicate
    List<Object[]> rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field >= 85");
    Assert.assertEquals(3, rows.size());
    Assert.assertArrayEquals(new Object[] { "85.00" }, rows.get(0));
    Assert.assertArrayEquals(new Object[] { "100.56" }, rows.get(1));
    Assert.assertArrayEquals(new Object[] { "100.57" }, rows.get(2));
    // Use decimal literal in predicate with smaller scale than schema type definition
    rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 99.1");
    Assert.assertEquals(2, rows.size());
    Assert.assertArrayEquals(new Object[] { "100.56" }, rows.get(0));
    Assert.assertArrayEquals(new Object[] { "100.57" }, rows.get(1));
    // Use decimal literal in predicate with higher scale than schema type definition
    rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 100.565");
    Assert.assertEquals(1, rows.size());
    Assert.assertArrayEquals(new Object[] { "100.57" }, rows.get(0));
    // Use decimal literal in predicate with the same scale as schema type definition
    rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 640.34");
    Assert.assertEquals(0, rows.size());
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) BigDecimal(java.math.BigDecimal) Test(org.junit.Test)

Example 32 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergComplexTypeWrites method testWriteMapOfPrimitivesInTable.

@Test
public void testWriteMapOfPrimitivesInTable() throws IOException {
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "mapofprimitives", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.StringType.get())));
    List<Record> records = TestHelper.generateRandomRecords(schema, 5, 0L);
    testComplexTypeWrite(schema, records);
}
Also used : Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) GenericRecord(org.apache.iceberg.data.GenericRecord) Test(org.junit.Test)

Example 33 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testAddColumnIntoStructToIcebergTable.

@Test
public void testAddColumnIntoStructToIcebergTable() throws IOException {
    // Create an Iceberg table with the columns id and person, where person is a struct, consists of the
    // columns first_name and last_name.
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "person", Types.StructType.of(required(3, "first_name", Types.StringType.get()), required(4, "last_name", Types.StringType.get()))));
    List<Record> people = TestHelper.generateRandomRecords(schema, 3, 0L);
    Table icebergTable = testTables.createTable(shell, "people", schema, fileFormat, people);
    // Add a new column (age long) to the Iceberg table into the person struct
    icebergTable.updateSchema().addColumn("person", "age", Types.LongType.get()).commit();
    Schema schemaWithAge = new Schema(required(1, "id", Types.LongType.get()), required(2, "person", Types.StructType.of(required(3, "first_name", Types.StringType.get()), required(4, "last_name", Types.StringType.get()), optional(5, "age", Types.LongType.get()))));
    List<Record> newPeople = TestHelper.generateRandomRecords(schemaWithAge, 2, 10L);
    // Also add a new entry to the table where the age column is set.
    icebergTable = testTables.loadTable(TableIdentifier.of("default", "people"));
    testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newPeople);
    List<Record> sortedExpected = new ArrayList<>(people);
    sortedExpected.addAll(newPeople);
    sortedExpected.sort(Comparator.comparingLong(record -> (Long) record.get(0)));
    List<Object[]> rows = shell.executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people order by id");
    Assert.assertEquals(sortedExpected.size(), rows.size());
    for (int i = 0; i < sortedExpected.size(); i++) {
        Object[] row = rows.get(i);
        Long id = (Long) sortedExpected.get(i).get(0);
        Record person = (Record) sortedExpected.get(i).getField("person");
        String lastName = (String) person.getField("last_name");
        String firstName = (String) person.getField("first_name");
        Long age = null;
        if (person.getField("age") != null) {
            age = (Long) person.getField("age");
        }
        Assert.assertEquals(id, (Long) row[0]);
        Assert.assertEquals(firstName, (String) row[1]);
        Assert.assertEquals(lastName, (String) row[2]);
        Assert.assertEquals(age, row[3]);
    }
    // Insert some data with age column from Hive. Insert an entry with null age and an entry with filled age.
    shell.executeStatement("CREATE TABLE dummy_tbl (id bigint, first_name string, last_name string, age bigint)");
    shell.executeStatement("INSERT INTO dummy_tbl VALUES (1, 'Lily', 'Blue', 34), (2, 'Roni', 'Grey', NULL)");
    shell.executeStatement("INSERT INTO default.people SELECT id, named_struct('first_name', first_name, " + "'last_name', last_name, 'age', age) from dummy_tbl");
    rows = shell.executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people " + "where id in (1, 2) order by id");
    Assert.assertEquals(2, rows.size());
    Assert.assertEquals((Long) 1L, (Long) rows.get(0)[0]);
    Assert.assertEquals("Lily", (String) rows.get(0)[1]);
    Assert.assertEquals("Blue", (String) rows.get(0)[2]);
    Assert.assertEquals((Long) 34L, (Long) rows.get(0)[3]);
    Assert.assertEquals((Long) 2L, (Long) rows.get(1)[0]);
    Assert.assertEquals("Roni", (String) rows.get(1)[1]);
    Assert.assertEquals("Grey", (String) rows.get(1)[2]);
    Assert.assertNull(rows.get(1)[3]);
}
Also used : AssertHelpers(org.apache.iceberg.AssertHelpers) Types(org.apache.iceberg.types.Types) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) TException(org.apache.thrift.TException) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) IOException(java.io.IOException) Test(org.junit.Test) TestHelper(org.apache.iceberg.mr.TestHelper) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) List(java.util.List) Record(org.apache.iceberg.data.Record) NestedField.required(org.apache.iceberg.types.Types.NestedField.required) Assume(org.junit.Assume) Comparator(java.util.Comparator) Assert(org.junit.Assert) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 34 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testColumnReorders.

@Test
public void testColumnReorders() throws IOException {
    Schema schema = new Schema(required(1, "a", Types.LongType.get()), required(2, "b", Types.StringType.get()), required(3, "c", Types.StringType.get()), required(4, "d", Types.IntegerType.get()), required(5, "e", Types.IntegerType.get()), required(6, "f", Types.StringType.get()));
    testTables.createTable(shell, "customers", schema, fileFormat, ImmutableList.of());
    shell.executeStatement("INSERT INTO customers VALUES (1, 'foo', 'bar', 33, 44, 'baz'), " + "(2, 'foo2', 'bar2', 55, 66, 'baz2')");
    // move one position to the right
    // a,b,c,d,e,f -> b,a,c,d,e,f
    shell.executeStatement("ALTER TABLE customers CHANGE COLUMN a a bigint AFTER b");
    List<Object[]> result = shell.executeStatement("SELECT * FROM customers ORDER BY a");
    Assert.assertEquals(2, result.size());
    Assert.assertArrayEquals(new Object[] { "foo", 1L, "bar", 33, 44, "baz" }, result.get(0));
    Assert.assertArrayEquals(new Object[] { "foo2", 2L, "bar2", 55, 66, "baz2" }, result.get(1));
    // move first to the last
    // b,a,c,d,e,f -> a,c,d,e,f,b
    shell.executeStatement("ALTER TABLE customers CHANGE COLUMN b b string AFTER f");
    result = shell.executeStatement("SELECT * FROM customers ORDER BY a");
    Assert.assertEquals(2, result.size());
    Assert.assertArrayEquals(new Object[] { 1L, "bar", 33, 44, "baz", "foo" }, result.get(0));
    Assert.assertArrayEquals(new Object[] { 2L, "bar2", 55, 66, "baz2", "foo2" }, result.get(1));
    // move middle to the first
    // a,c,d,e,f,b -> e,a,c,d,f,b
    shell.executeStatement("ALTER TABLE customers CHANGE COLUMN e e int FIRST");
    result = shell.executeStatement("SELECT * FROM customers ORDER BY a");
    Assert.assertEquals(2, result.size());
    Assert.assertArrayEquals(new Object[] { 44, 1L, "bar", 33, "baz", "foo" }, result.get(0));
    Assert.assertArrayEquals(new Object[] { 66, 2L, "bar2", 55, "baz2", "foo2" }, result.get(1));
    // move one position to the left
    // e,a,c,d,f,b -> e,a,d,c,f,b
    shell.executeStatement("ALTER TABLE customers CHANGE COLUMN d d int AFTER a");
    result = shell.executeStatement("SELECT * FROM customers ORDER BY a");
    Assert.assertEquals(2, result.size());
    Assert.assertArrayEquals(new Object[] { 44, 1L, 33, "bar", "baz", "foo" }, result.get(0));
    Assert.assertArrayEquals(new Object[] { 66, 2L, 55, "bar2", "baz2", "foo2" }, result.get(1));
}
Also used : Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Test(org.junit.Test)

Example 35 with Schema

use of org.apache.iceberg.Schema in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testMoveLastNameBeforeCustomerIdInIcebergTable.

@Test
public void testMoveLastNameBeforeCustomerIdInIcebergTable() throws IOException {
    // Create an Iceberg table with the columns customer_id, first_name and last_name with some initial data.
    Table icebergTable = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    // Move the last_name column before the customer_id in the table schema.
    icebergTable.updateSchema().moveBefore("last_name", "customer_id").commit();
    Schema customerSchemaLastNameFirst = new Schema(optional(1, "last_name", Types.StringType.get(), "This is last name"), optional(2, "customer_id", Types.LongType.get()), optional(3, "first_name", Types.StringType.get(), "This is first name"));
    TestHelper.RecordsBuilder customersWithLastNameFirstBuilder = TestHelper.RecordsBuilder.newInstance(customerSchemaLastNameFirst).add("Brown", 0L, "Alice").add("Green", 1L, "Bob").add("Pink", 2L, "Trudy");
    List<Record> customersWithLastNameFirst = customersWithLastNameFirstBuilder.build();
    // Run a 'select *' to check if the order of the column in the result has been changed.
    List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithLastNameFirst, HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1);
    // Query the data with names and check if the result is the same as when the table was created.
    rows = shell.executeStatement("SELECT customer_id, first_name, last_name FROM default.customers");
    HiveIcebergTestUtils.validateData(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, HiveIcebergTestUtils.valueForRow(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, rows), 0);
    // Insert data from Hive to check if the last_name column has to be before the customer_id in the values list.
    shell.executeStatement("INSERT INTO default.customers values ('Magenta', 3L, 'Lily')");
    customersWithLastNameFirstBuilder.add("Magenta", 3L, "Lily");
    customersWithLastNameFirst = customersWithLastNameFirstBuilder.build();
    rows = shell.executeStatement("SELECT * FROM default.customers");
    HiveIcebergTestUtils.validateData(customersWithLastNameFirst, HiveIcebergTestUtils.valueForRow(customerSchemaLastNameFirst, rows), 1);
}
Also used : TestHelper(org.apache.iceberg.mr.TestHelper) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Aggregations

Schema (org.apache.iceberg.Schema)126 Test (org.junit.Test)93 Record (org.apache.iceberg.data.Record)68 Table (org.apache.iceberg.Table)55 PartitionSpec (org.apache.iceberg.PartitionSpec)39 GenericRecord (org.apache.iceberg.data.GenericRecord)36 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)30 List (java.util.List)21 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)20 IOException (java.io.IOException)16 Types (org.apache.iceberg.types.Types)16 ArrayList (java.util.ArrayList)15 Map (java.util.Map)14 HashMap (java.util.HashMap)13 FileFormat (org.apache.iceberg.FileFormat)13 UpdateSchema (org.apache.iceberg.UpdateSchema)12 Path (org.apache.hadoop.fs.Path)11 Collectors (java.util.stream.Collectors)10 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 TestHelper (org.apache.iceberg.mr.TestHelper)9