Search in sources :

Example 71 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestInputFormatReaderDeletes method rowSet.

@Override
public StructLikeSet rowSet(String name, Table table, String... columns) {
    InputFormatConfig.ConfigBuilder builder = new InputFormatConfig.ConfigBuilder(conf).readFrom(table.location());
    Schema projected = table.schema().select(columns);
    StructLikeSet set = StructLikeSet.create(projected.asStruct());
    set.addAll(TestIcebergInputFormats.TESTED_INPUT_FORMATS.stream().filter(recordFactory -> recordFactory.name().equals(inputFormat)).map(recordFactory -> recordFactory.create(builder.project(projected).conf()).getRecords()).flatMap(List::stream).map(record -> new InternalRecordWrapper(projected.asStruct()).wrap(record)).collect(Collectors.toList()));
    return set;
}
Also used : BaseTable(org.apache.iceberg.BaseTable) InternalRecordWrapper(org.apache.iceberg.data.InternalRecordWrapper) Table(org.apache.iceberg.Table) StructLikeSet(org.apache.iceberg.util.StructLikeSet) HadoopTables(org.apache.iceberg.hadoop.HadoopTables) RunWith(org.junit.runner.RunWith) IOException(java.io.IOException) Schema(org.apache.iceberg.Schema) TestIcebergInputFormats(org.apache.iceberg.mr.hive.TestIcebergInputFormats) Collectors(java.util.stream.Collectors) FileFormat(org.apache.iceberg.FileFormat) File(java.io.File) TableMetadata(org.apache.iceberg.TableMetadata) List(java.util.List) TableOperations(org.apache.iceberg.TableOperations) Configuration(org.apache.hadoop.conf.Configuration) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteReadTests(org.apache.iceberg.data.DeleteReadTests) Assert(org.junit.Assert) Parameterized(org.junit.runners.Parameterized) Before(org.junit.Before) Schema(org.apache.iceberg.Schema) InternalRecordWrapper(org.apache.iceberg.data.InternalRecordWrapper) StructLikeSet(org.apache.iceberg.util.StructLikeSet)

Example 72 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergPartitions method testHourTransform.

@Test
public void testHourTransform() throws IOException {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "part_field", Types.TimestampType.withoutZone()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).hour("part_field").build();
    List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(1L, LocalDateTime.of(2019, 2, 22, 9, 44, 54)).add(2L, LocalDateTime.of(2019, 2, 22, 10, 44, 54)).add(3L, LocalDateTime.of(2019, 2, 23, 9, 44, 54)).build();
    Table table = testTables.createTable(shell, "part_test", schema, spec, fileFormat, records);
    HiveIcebergTestUtils.validateData(table, records, 0);
    HiveIcebergTestUtils.validateDataWithSQL(shell, "part_test", records, "id");
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 73 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergPartitions method testDayTransform.

@Test
public void testDayTransform() throws IOException {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "part_field", Types.TimestampType.withoutZone()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).day("part_field").build();
    List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(1L, LocalDateTime.of(2019, 2, 22, 9, 44, 54)).add(2L, LocalDateTime.of(2019, 2, 22, 10, 44, 54)).add(3L, LocalDateTime.of(2019, 2, 23, 9, 44, 54)).build();
    Table table = testTables.createTable(shell, "part_test", schema, spec, fileFormat, records);
    HiveIcebergTestUtils.validateData(table, records, 0);
    HiveIcebergTestUtils.validateDataWithSQL(shell, "part_test", records, "id");
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 74 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergPartitions method testPartitionPruning.

@Test
public void testPartitionPruning() throws IOException {
    Schema salesSchema = new Schema(required(1, "ss_item_sk", Types.IntegerType.get()), required(2, "ss_sold_date_sk", Types.IntegerType.get()));
    PartitionSpec salesSpec = PartitionSpec.builderFor(salesSchema).identity("ss_sold_date_sk").build();
    Schema dimSchema = new Schema(required(1, "d_date_sk", Types.IntegerType.get()), required(2, "d_moy", Types.IntegerType.get()));
    List<Record> salesRecords = TestHelper.RecordsBuilder.newInstance(salesSchema).add(51, 5).add(61, 6).add(71, 7).add(81, 8).add(91, 9).build();
    List<Record> dimRecords = TestHelper.RecordsBuilder.newInstance(salesSchema).add(1, 10).add(2, 20).add(3, 30).add(4, 40).add(5, 50).build();
    Table salesTable = testTables.createTable(shell, "x1_store_sales", salesSchema, salesSpec, fileFormat, null);
    PartitionKey partitionKey = new PartitionKey(salesSpec, salesSchema);
    for (Record r : salesRecords) {
        partitionKey.partition(r);
        testTables.appendIcebergTable(shell.getHiveConf(), salesTable, fileFormat, partitionKey, ImmutableList.of(r));
    }
    testTables.createTable(shell, "x1_date_dim", dimSchema, fileFormat, dimRecords);
    String query = "select s.ss_item_sk from x1_store_sales s, x1_date_dim d " + "where s.ss_sold_date_sk=d.d_date_sk*2 and d.d_moy=30";
    // Check the query results
    List<Object[]> rows = shell.executeStatement(query);
    Assert.assertEquals(1, rows.size());
    Assert.assertArrayEquals(new Object[] { 61 }, rows.get(0));
    // Check if Dynamic Partitioning is used
    Assert.assertTrue(shell.executeStatement("explain " + query).stream().filter(a -> ((String) a[0]).contains("Dynamic Partitioning Event Operator")).findAny().isPresent());
}
Also used : Types(org.apache.iceberg.types.Types) Table(org.apache.iceberg.Table) LocalDateTime(java.time.LocalDateTime) NestedField.optional(org.apache.iceberg.types.Types.NestedField.optional) IOException(java.io.IOException) Test(org.junit.Test) TestHelper(org.apache.iceberg.mr.TestHelper) ImmutableList(org.apache.iceberg.relocated.com.google.common.collect.ImmutableList) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) List(java.util.List) Record(org.apache.iceberg.data.Record) OffsetDateTime(java.time.OffsetDateTime) NestedField.required(org.apache.iceberg.types.Types.NestedField.required) LocalDate(java.time.LocalDate) PartitionSpec(org.apache.iceberg.PartitionSpec) PartitionKey(org.apache.iceberg.PartitionKey) Assume(org.junit.Assume) ZoneOffset(java.time.ZoneOffset) Assert(org.junit.Assert) Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) PartitionKey(org.apache.iceberg.PartitionKey) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Example 75 with Table

use of org.apache.iceberg.Table in project hive by apache.

the class TestHiveIcebergPartitions method testBucketTransform.

@Test
public void testBucketTransform() throws IOException {
    Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "part_field", Types.StringType.get()));
    PartitionSpec spec = PartitionSpec.builderFor(schema).bucket("part_field", 2).build();
    List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(1L, "Part1").add(2L, "Part2").add(3L, "Art3").build();
    Table table = testTables.createTable(shell, "part_test", schema, spec, fileFormat, records);
    HiveIcebergTestUtils.validateData(table, records, 0);
    HiveIcebergTestUtils.validateDataWithSQL(shell, "part_test", records, "id");
}
Also used : Table(org.apache.iceberg.Table) Schema(org.apache.iceberg.Schema) Record(org.apache.iceberg.data.Record) PartitionSpec(org.apache.iceberg.PartitionSpec) Test(org.junit.Test)

Aggregations

Table (org.apache.iceberg.Table)188 Test (org.junit.Test)132 Schema (org.apache.iceberg.Schema)66 TableIdentifier (org.apache.iceberg.catalog.TableIdentifier)56 Record (org.apache.iceberg.data.Record)56 PartitionSpec (org.apache.iceberg.PartitionSpec)51 IOException (java.io.IOException)27 FieldSchema (org.apache.hadoop.hive.metastore.api.FieldSchema)27 List (java.util.List)22 Map (java.util.Map)20 DataFile (org.apache.iceberg.DataFile)19 NoSuchTableException (org.apache.iceberg.exceptions.NoSuchTableException)19 Collectors (java.util.stream.Collectors)18 BaseTable (org.apache.iceberg.BaseTable)18 Types (org.apache.iceberg.types.Types)18 Properties (java.util.Properties)17 Configuration (org.apache.hadoop.conf.Configuration)17 Path (org.apache.hadoop.fs.Path)17 FileFormat (org.apache.iceberg.FileFormat)16 ArrayList (java.util.ArrayList)15