Search in sources :

Example 41 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergV2 method testReadAndWriteFormatV2UnpartitionedWithEqDelete.

@Test
public void testReadAndWriteFormatV2UnpartitionedWithEqDelete() throws IOException {
    Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
    Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, PartitionSpec.unpartitioned(), fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
    // delete one of the rows
    List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(1L, "Bob", null).build();
    DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
    tbl.newRowDelta().addDeletes(deleteFile).commit();
    List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
    // only the other two rows are present
    Assert.assertEquals(2, objects.size());
    Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
    Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
Also used : Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Example 42 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_PosDelete_RowSupplied.

@Test
public void testReadAndWriteFormatV2Partitioned_PosDelete_RowSupplied() throws IOException {
    Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
    PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
    Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
    // add some more data to the same partition
    shell.executeStatement("insert into customers values (0, 'Laura', 'Yellow'), (0, 'John', 'Green'), " + "(0, 'Blake', 'Blue')");
    tbl.refresh();
    // delete the first and third rows from the newly-added data file
    DataFile dataFile = StreamSupport.stream(tbl.currentSnapshot().addedFiles().spliterator(), false).filter(file -> file.partition().get(0, Long.class) == 0L).filter(file -> file.recordCount() == 3).findAny().orElseThrow(() -> new RuntimeException("Did not find the desired data file in the test table"));
    List<Record> rowsToDel = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Laura", "Yellow").add(0L, "Blake", "Blue").build();
    List<PositionDelete<Record>> deletes = ImmutableList.of(positionDelete(dataFile.path(), 0L, rowsToDel.get(0)), positionDelete(dataFile.path(), 2L, rowsToDel.get(1)));
    DeleteFile deleteFile = HiveIcebergTestUtils.createPositionalDeleteFile(tbl, "dummyPath", fileFormat, ImmutableMap.of("customer_id", 0L), deletes);
    tbl.newRowDelta().addDeletes(deleteFile).commit();
    List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id, first_name");
    Assert.assertEquals(4, objects.size());
    Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
    Assert.assertArrayEquals(new Object[] { 0L, "John", "Green" }, objects.get(1));
    Assert.assertArrayEquals(new Object[] { 1L, "Bob", "Green" }, objects.get(2));
    Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(3));
}
Also used : DataFile(org.apache.iceberg.DataFile) Types(org.apache.iceberg.types.Types) ImmutableMap(org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap) Table(org.apache.iceberg.Table) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) IOException(java.io.IOException) Test(org.junit.Test) TestHelper(org.apache.iceberg.mr.TestHelper) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) List(java.util.List) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) StreamSupport(java.util.stream.StreamSupport) DeleteFile(org.apache.iceberg.DeleteFile) Assume(org.junit.Assume) DataFile(org.apache.iceberg.DataFile) Assert(org.junit.Assert) PositionDelete(org.apache.iceberg.deletes.PositionDelete) Table(org.apache.iceberg.Table) PositionDelete(org.apache.iceberg.deletes.PositionDelete) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteFile(org.apache.iceberg.DeleteFile) Test(org.junit.Test)

Example 43 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestIcebergInputFormats method testCustomCatalog.

@Test
public void testCustomCatalog() throws IOException {
    String warehouseLocation = temp.newFolder("hadoop_catalog").getAbsolutePath();
    conf.set("warehouse.location", warehouseLocation);
    conf.set(InputFormatConfig.CATALOG_NAME, Catalogs.ICEBERG_DEFAULT_CATALOG_NAME);
    conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogUtil.ICEBERG_CATALOG_TYPE), CatalogUtil.ICEBERG_CATALOG_TYPE_HADOOP);
    conf.set(InputFormatConfig.catalogPropertyConfigKey(Catalogs.ICEBERG_DEFAULT_CATALOG_NAME, CatalogProperties.WAREHOUSE_LOCATION), warehouseLocation);
    Catalog catalog = new HadoopCatalog(conf, conf.get("warehouse.location"));
    TableIdentifier identifier = TableIdentifier.of("db", "t");
    Table table = catalog.createTable(identifier, SCHEMA, SPEC, helper.properties());
    helper.setTable(table);
    List<Record> expectedRecords = helper.generateRandomRecords(1, 0L);
    expectedRecords.get(0).set(2, "2020-03-20");
    helper.appendToTable(Row.of("2020-03-20", 0), expectedRecords);
    builder.readFrom(identifier);
    testInputFormat.create(builder.conf()).validate(expectedRecords);
}
Also used : TableIdentifier(org.apache.iceberg.catalog.TableIdentifier) Table(org.apache.iceberg.Table) Record(org.apache.iceberg.data.Record) HadoopCatalog(org.apache.iceberg.hadoop.HadoopCatalog) HadoopCatalog(org.apache.iceberg.hadoop.HadoopCatalog) Catalog(org.apache.iceberg.catalog.Catalog) Test(org.junit.Test)

Example 44 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergTimeTravel method testAsOfWithJoins.

@Test
public void testAsOfWithJoins() throws IOException, InterruptedException {
    Table table = prepareTableWithVersions(4);
    List<Object[]> rows = shell.executeStatement("SELECT * FROM " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 0) + "' fv, " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 1) + "' sv " + "WHERE fv.first_name=sv.first_name");
    Assert.assertEquals(4, rows.size());
    rows = shell.executeStatement("SELECT * FROM " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 1) + "' sv, " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 2) + "' tv " + "WHERE sv.first_name=tv.first_name");
    Assert.assertEquals(8, rows.size());
    rows = shell.executeStatement("SELECT * FROM " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 2) + "' sv, " + "customers lv " + "WHERE sv.first_name=lv.first_name");
    Assert.assertEquals(14, rows.size());
    rows = shell.executeStatement("SELECT * FROM " + "customers FOR SYSTEM_TIME AS OF '" + timestampAfterSnapshot(table, 1) + "' sv, " + "customers FOR SYSTEM_VERSION AS OF " + table.history().get(2).snapshotId() + " tv " + "WHERE sv.first_name=tv.first_name");
    Assert.assertEquals(8, rows.size());
}
Also used : Table(org.apache.iceberg.Table) Test(org.junit.Test)

Example 45 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergTimeTravel method prepareTableWithVersions.

/**
 * Creates the 'customers' table with the default records and creates extra snapshots by inserting one more line
 * into the table.
 * @param versions The number of history elements we want to create
 * @return The table created
 * @throws IOException When there is a problem during table creation
 * @throws InterruptedException When there is a problem during adding new data to the table
 */
private Table prepareTableWithVersions(int versions) throws IOException, InterruptedException {
    Table table = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
    for (int i = 0; i < versions - 1; ++i) {
        // Just wait a little so we definitely will not have the same timestamp for the snapshots
        Thread.sleep(100);
        shell.executeStatement("INSERT INTO customers values(" + (i + HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.size()) + ",'Alice','Green_" + i + "')");
    }
    table.refresh();
    return table;
}
Also used : Table(org.apache.iceberg.Table)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15