use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergStorageHandlerNoScan method testSetPartitionTransform.
@Test
public void testSetPartitionTransform() {
Schema schema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "year_field", Types.DateType.get()), optional(3, "month_field", Types.TimestampType.withZone()), optional(4, "day_field", Types.TimestampType.withoutZone()), optional(5, "hour_field", Types.TimestampType.withoutZone()), optional(6, "truncate_field", Types.StringType.get()), optional(7, "bucket_field", Types.StringType.get()), optional(8, "identity_field", Types.StringType.get()));
TableIdentifier identifier = TableIdentifier.of("default", "part_test");
shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " PARTITIONED BY SPEC (year(year_field), hour(hour_field), " + "truncate(2, truncate_field), bucket(2, bucket_field), identity_field)" + " STORED BY ICEBERG " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.CATALOG_NAME + "'='" + testTables.catalogName() + "')");
PartitionSpec spec = PartitionSpec.builderFor(schema).year("year_field").hour("hour_field").truncate("truncate_field", 2).bucket("bucket_field", 2).identity("identity_field").build();
Table table = testTables.loadTable(identifier);
Assert.assertEquals(spec, table.spec());
shell.executeStatement("ALTER TABLE default.part_test SET PARTITION SPEC(year(year_field), month(month_field), " + "day(day_field))");
spec = PartitionSpec.builderFor(schema).withSpecId(1).year("year_field").alwaysNull("hour_field", "hour_field_hour").alwaysNull("truncate_field", "truncate_field_trunc").alwaysNull("bucket_field", "bucket_field_bucket").alwaysNull("identity_field", "identity_field").month("month_field").day("day_field").build();
table.refresh();
Assert.assertEquals(spec, table.spec());
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied.
@Test
public void testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied() throws IOException {
Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
// add one more row to the same partition
shell.executeStatement("insert into customers values (1, 'Bob', 'Hoover')");
// delete all rows with id=1 and first_name=Bob
Schema shorterSchema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "name", Types.StringType.get()));
List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(shorterSchema).add(1L, "Bob").build();
DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
tbl.newRowDelta().addDeletes(deleteFile).commit();
List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
Assert.assertEquals(2, objects.size());
Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_PosDelete_RowNotSupplied.
@Test
public void testReadAndWriteFormatV2Partitioned_PosDelete_RowNotSupplied() throws IOException {
Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
// add some more data to the same partition
shell.executeStatement("insert into customers values (0, 'Laura', 'Yellow'), (0, 'John', 'Green'), " + "(0, 'Blake', 'Blue')");
tbl.refresh();
// delete the first and third rows from the newly-added data file - with row supplied
DataFile dataFile = StreamSupport.stream(tbl.currentSnapshot().addedFiles().spliterator(), false).filter(file -> file.partition().get(0, Long.class) == 0L).filter(file -> file.recordCount() == 3).findAny().orElseThrow(() -> new RuntimeException("Did not find the desired data file in the test table"));
List<PositionDelete<Record>> deletes = ImmutableList.of(positionDelete(dataFile.path(), 0L, null), positionDelete(dataFile.path(), 2L, null));
DeleteFile deleteFile = HiveIcebergTestUtils.createPositionalDeleteFile(tbl, "dummyPath", fileFormat, ImmutableMap.of("customer_id", 0L), deletes);
tbl.newRowDelta().addDeletes(deleteFile).commit();
List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id, first_name");
Assert.assertEquals(4, objects.size());
Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
Assert.assertArrayEquals(new Object[] { 0L, "John", "Green" }, objects.get(1));
Assert.assertArrayEquals(new Object[] { 1L, "Bob", "Green" }, objects.get(2));
Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(3));
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_EqDelete_AllColumnsSupplied.
@Test
public void testReadAndWriteFormatV2Partitioned_EqDelete_AllColumnsSupplied() throws IOException {
Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
// add one more row to the same partition
shell.executeStatement("insert into customers values (1, 'Bob', 'Hoover')");
// delete all rows with id=1 and first_name=Bob
List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(1L, "Bob", null).build();
DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
tbl.newRowDelta().addDeletes(deleteFile).commit();
List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
Assert.assertEquals(2, objects.size());
Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
use of org.apache.iceberg.PartitionSpec in project hive by apache.
the class TestHiveIcebergInserts method testInsertOverwritePartitionedTable.
@Test
public void testInsertOverwritePartitionedTable() throws IOException {
TableIdentifier target = TableIdentifier.of("default", "target");
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
Table table = testTables.createTable(shell, target.name(), HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, ImmutableList.of());
// IOW into empty target table -> whole source result set is inserted
List<Record> expected = new ArrayList<>(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
expected.add(TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(8L, "Sue", "Green").build().get(// add one more to 'Green' so we have a partition w/ multiple records
0));
shell.executeStatement(testTables.getInsertQuery(expected, target, true));
HiveIcebergTestUtils.validateData(table, expected, 0);
// IOW into non-empty target table -> only the affected partitions are overwritten
List<Record> newRecords = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Mike", // overwritten
"Brown").add(1L, "Christy", // overwritten (partition has this single record now)
"Green").add(3L, "Bill", // appended (new partition)
"Purple").build();
shell.executeStatement(testTables.getInsertQuery(newRecords, target, true));
expected = new ArrayList<>(newRecords);
// existing, untouched partition ('Pink')
expected.add(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2));
HiveIcebergTestUtils.validateData(table, expected, 0);
// IOW empty source result set -> has no effect on partitioned table
shell.executeStatement("INSERT OVERWRITE TABLE target SELECT * FROM target WHERE FALSE");
HiveIcebergTestUtils.validateData(table, expected, 0);
}
Aggregations