Search in sources :

Example 31 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergInserts method testInsertFromSelectWithOrderBy.

/**
 * Testing map-reduce inserts.
 * @throws IOException If there is an underlying IOException
 */
@Test
public void testInsertFromSelectWithOrderBy() throws IOException {
    Table table = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    // We expect that there will be Mappers and Reducers here
    shell.executeStatement("INSERT INTO customers SELECT * FROM customers ORDER BY customer_id");
    // Check that everything is duplicated as expected
    List<Record> records = new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    records.addAll(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    HiveIcebergTestUtils.validateData(table, records, 0);
}
Also used : Table(org.apache.iceberg.Table) ArrayList(java.util.ArrayList) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Example 32 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergOutputCommitter method testSuccessfulUnpartitionedWrite.

@Test
public void testSuccessfulUnpartitionedWrite() throws IOException {
    HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
    Table table = table(temp.getRoot().getPath(), false);
    JobConf conf = jobConf(table, 1);
    List<Record> expected = writeRecords(table.name(), 1, 0, true, false, conf);
    committer.commitJob(new JobContextImpl(conf, JOB_ID));
    HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 1);
    HiveIcebergTestUtils.validateData(table, expected, 0);
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 33 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergOutputCommitter method testSuccessfulMultipleTasksUnpartitionedWrite.

@Test
public void testSuccessfulMultipleTasksUnpartitionedWrite() throws IOException {
    HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
    Table table = table(temp.getRoot().getPath(), false);
    JobConf conf = jobConf(table, 2);
    List<Record> expected = writeRecords(table.name(), 2, 0, true, false, conf);
    committer.commitJob(new JobContextImpl(conf, JOB_ID));
    HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 2);
    HiveIcebergTestUtils.validateData(table, expected, 0);
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 34 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergOutputCommitter method testSuccessfulPartitionedWrite.

@Test
public void testSuccessfulPartitionedWrite() throws IOException {
    HiveIcebergOutputCommitter committer = new HiveIcebergOutputCommitter();
    Table table = table(temp.getRoot().getPath(), true);
    JobConf conf = jobConf(table, 1);
    List<Record> expected = writeRecords(table.name(), 1, 0, true, false, conf);
    committer.commitJob(new JobContextImpl(conf, JOB_ID));
    // Expecting 3 files with fanout-, 4 with ClusteredWriter where writing to already completed partitions is allowed.
    HiveIcebergTestUtils.validateFiles(table, conf, JOB_ID, 4);
    HiveIcebergTestUtils.validateData(table, expected, 0);
}
Also used : JobContextImpl(org.apache.hadoop.mapred.JobContextImpl) Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) JobConf(org.apache.hadoop.mapred.JobConf) Test(org.junit.Test)

Example 35 with Record

use of org.apache.iceberg.data.Record in project hive by apache.

the class TestHiveIcebergSchemaEvolution method testAddColumnIntoStructToIcebergTable.

@Test
public void testAddColumnIntoStructToIcebergTable() throws IOException {
    // Create an Iceberg table with the columns id and person, where person is a struct, consists of the
    // columns first_name and last_name.
    Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, "person", Types.StructType.of(required(3, "first_name", Types.StringType.get()), required(4, "last_name", Types.StringType.get()))));
    List<Record> people = TestHelper.generateRandomRecords(schema, 3, 0L);
    Table icebergTable = testTables.createTable(shell, "people", schema, fileFormat, people);
    // Add a new column (age long) to the Iceberg table into the person struct
    icebergTable.updateSchema().addColumn("person", "age", Types.LongType.get()).commit();
    Schema schemaWithAge = new Schema(required(1, "id", Types.LongType.get()), required(2, "person", Types.StructType.of(required(3, "first_name", Types.StringType.get()), required(4, "last_name", Types.StringType.get()), optional(5, "age", Types.LongType.get()))));
    List<Record> newPeople = TestHelper.generateRandomRecords(schemaWithAge, 2, 10L);
    // Also add a new entry to the table where the age column is set.
    icebergTable = testTables.loadTable(TableIdentifier.of("default", "people"));
    testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, null, newPeople);
    List<Record> sortedExpected = new ArrayList<>(people);
    sortedExpected.addAll(newPeople);
    sortedExpected.sort(Comparator.comparingLong(record -> (Long) record.get(0)));
    List<Object[]> rows = shell.executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people order by id");
    Assert.assertEquals(sortedExpected.size(), rows.size());
    for (int i = 0; i < sortedExpected.size(); i++) {
        Object[] row = rows.get(i);
        Long id = (Long) sortedExpected.get(i).get(0);
        Record person = (Record) sortedExpected.get(i).getField("person");
        String lastName = (String) person.getField("last_name");
        String firstName = (String) person.getField("first_name");
        Long age = null;
        if (person.getField("age") != null) {
            age = (Long) person.getField("age");
        }
        Assert.assertEquals(id, (Long) row[0]);
        Assert.assertEquals(firstName, (String) row[1]);
        Assert.assertEquals(lastName, (String) row[2]);
        Assert.assertEquals(age, row[3]);
    }
    // Insert some data with age column from Hive. Insert an entry with null age and an entry with filled age.
    shell.executeStatement("CREATE TABLE dummy_tbl (id bigint, first_name string, last_name string, age bigint)");
    shell.executeStatement("INSERT INTO dummy_tbl VALUES (1, 'Lily', 'Blue', 34), (2, 'Roni', 'Grey', NULL)");
    shell.executeStatement("INSERT INTO default.people SELECT id, named_struct('first_name', first_name, " + "'last_name', last_name, 'age', age) from dummy_tbl");
    rows = shell.executeStatement("SELECT id, person.first_name, person.last_name, person.age FROM default.people " + "where id in (1, 2) order by id");
    Assert.assertEquals(2, rows.size());
    Assert.assertEquals((Long) 1L, (Long) rows.get(0)[0]);
    Assert.assertEquals("Lily", (String) rows.get(0)[1]);
    Assert.assertEquals("Blue", (String) rows.get(0)[2]);
    Assert.assertEquals((Long) 34L, (Long) rows.get(0)[3]);
    Assert.assertEquals((Long) 2L, (Long) rows.get(1)[0]);
    Assert.assertEquals("Roni", (String) rows.get(1)[1]);
    Assert.assertEquals("Grey", (String) rows.get(1)[2]);
    Assert.assertNull(rows.get(1)[3]);
}
Also used : AssertHelpers(org.apache.iceberg.AssertHelpers) Types(org.apache.iceberg.types.Types) TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) HiveSchemaUtil(org.apache.iceberg.hive.HiveSchemaUtil) TException(org.apache.thrift.TException) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) IOException(java.io.IOException) Test(org.junit.Test) TestHelper(org.apache.iceberg.mr.TestHelper) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) ArrayList(java.util.ArrayList) BigDecimal(java.math.BigDecimal) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) List(java.util.List) Record(org.apache.iceberg.data.Record) NestedField.required(org.apache.iceberg.types.Types.NestedField.required) Assume(org.junit.Assume) Comparator(java.util.Comparator) Assert(org.junit.Assert) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) FieldSchema(org.apache.hadoop.hive.metastore.api.FieldSchema) ArrayList(java.util.ArrayList) Record(org.apache.iceberg.data.Record) Test(org.junit.Test)

Aggregations

Record (org.apache.iceberg.data.Record)114 Test (org.junit.Test)99 Schema (org.apache.iceberg.Schema)68 Table (org.apache.iceberg.Table)51 GenericRecord (org.apache.iceberg.data.GenericRecord)51 PartitionSpec (org.apache.iceberg.PartitionSpec)19 ArrayList (java.util.ArrayList)14 List (java.util.List)13 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)12 HashMap (java.util.HashMap)11 IcebergBaseTest (org.apache.drill.metastore.iceberg.IcebergBaseTest)11 TestHelper (org.apache.iceberg.mr.TestHelper)11 ImmutableList (org.apache.iceberg.relocated.com.google.common.collect.ImmutableList)10 Types (org.apache.iceberg.types.Types)10 Map (java.util.Map)9 IOException (java.io.IOException)8 ImmutableMap (org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap)8 FileFormat (org.apache.iceberg.FileFormat)7 DeleteFile (org.apache.iceberg.DeleteFile)6 NestedField.optional (org.apache.iceberg.types.Types.NestedField.optional)6