use of org.apache.iceberg.data.Record in project hive by apache.
the class HiveIcebergTestUtils method getTestRecord.
/**
* Generates a test record where every field has a value.
* @return Record with every field set
*/
public static Record getTestRecord() {
Record record = GenericRecord.create(HiveIcebergTestUtils.FULL_SCHEMA);
record.set(0, true);
record.set(1, 1);
record.set(2, 2L);
record.set(3, 3.1f);
record.set(4, 4.2d);
record.set(5, LocalDate.of(2020, 1, 21));
// Nano is not supported ?
record.set(6, OffsetDateTime.of(2017, 11, 22, 11, 30, 7, 0, ZoneOffset.ofHours(2)));
record.set(7, LocalDateTime.of(2019, 2, 22, 9, 44, 54));
record.set(8, "kilenc");
record.set(9, new byte[] { 0, 1, 2 });
record.set(10, ByteBuffer.wrap(new byte[] { 0, 1, 2, 3 }));
record.set(11, new BigDecimal("0.0000000013"));
record.set(12, LocalTime.of(11, 33));
record.set(13, UUID.fromString("73689599-d7fc-4dfb-b94e-106ff20284a5"));
return record;
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class HiveIcebergTestUtils method createEqualityDeleteFile.
/**
* @param table The table to create the delete file for
* @param deleteFilePath The path where the delete file should be created, relative to the table location root
* @param equalityFields List of field names that should play a role in the equality check
* @param fileFormat The file format that should be used for writing out the delete file
* @param rowsToDelete The rows that should be deleted. It's enough to fill out the fields that are relevant for the
* equality check, as listed in equalityFields, the rest of the fields are ignored
* @return The DeleteFile created
* @throws IOException If there is an error during DeleteFile write
*/
public static DeleteFile createEqualityDeleteFile(Table table, String deleteFilePath, List<String> equalityFields, FileFormat fileFormat, List<Record> rowsToDelete) throws IOException {
List<Integer> equalityFieldIds = equalityFields.stream().map(id -> table.schema().findField(id).fieldId()).collect(Collectors.toList());
Schema eqDeleteRowSchema = table.schema().select(equalityFields.toArray(new String[] {}));
FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), ArrayUtil.toIntArray(equalityFieldIds), eqDeleteRowSchema, null);
EncryptedOutputFile outputFile = table.encryption().encrypt(HadoopOutputFile.fromPath(new org.apache.hadoop.fs.Path(table.location(), deleteFilePath), new Configuration()));
PartitionKey part = new PartitionKey(table.spec(), eqDeleteRowSchema);
part.partition(rowsToDelete.get(0));
EqualityDeleteWriter<Record> eqWriter = appenderFactory.newEqDeleteWriter(outputFile, fileFormat, part);
try (EqualityDeleteWriter<Record> writer = eqWriter) {
writer.deleteAll(rowsToDelete);
}
return eqWriter.toDeleteFile();
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergTruncateTable method testTruncateTablePartitionedIcebergTable.
@Test
public void testTruncateTablePartitionedIcebergTable() throws IOException, TException, InterruptedException {
// Create a partitioned Iceberg table with some initial data and run a truncate table command on this table.
// Then check if the data is deleted and the table statistics are reset to 0.
String databaseName = "default";
String tableName = "customers";
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("last_name").build();
List<Record> records = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(0L, "Alice", "Brown").add(1L, "Bob", "Brown").add(2L, "Trudy", "Green").add(3L, "John", "Pink").add(4L, "Jane", "Pink").build();
Table icebergTable = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, records);
testTruncateTable(databaseName, tableName, icebergTable, records, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergTruncateTable method testMultipleTruncateTable.
@Test
public void testMultipleTruncateTable() throws IOException, TException, InterruptedException {
// Create an Iceberg table with come records in it, then execute a truncate table command
// and check the result. Then insert some new data and run an other truncate table command.
// The purpose of this test is to make sure that multiple truncate table commands can
// run after each other without any issue (like issues with locking).
String databaseName = "default";
String tableName = "customers";
Table icebergTable = testTables.createTable(shell, tableName, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
testTruncateTable(databaseName, tableName, icebergTable, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
List<Record> newRecords = TestHelper.RecordsBuilder.newInstance(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).add(3L, "Jane", "Purple").add(4L, "Tim", "Grey").add(5L, "Eva", "Yellow").add(6L, "James", "White").add(7L, "Jack", "Black").build();
shell.executeStatement("INSERT INTO default.customers values (3, 'Jane', 'Purple'), (4, 'Tim', 'Grey')," + "(5, 'Eva', 'Yellow'), (6, 'James', 'White'), (7, 'Jack', 'Black')");
icebergTable = testTables.loadTable(TableIdentifier.of(databaseName, tableName));
testTruncateTable(databaseName, tableName, icebergTable, newRecords, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, true, false);
}
use of org.apache.iceberg.data.Record in project hive by apache.
the class TestHiveIcebergV2 method testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied.
@Test
public void testReadAndWriteFormatV2Partitioned_EqDelete_OnlyEqColumnsSupplied() throws IOException {
Assume.assumeFalse("Reading V2 tables with delete files are only supported currently in " + "non-vectorized mode and only Parquet/Avro", isVectorized || fileFormat == FileFormat.ORC);
PartitionSpec spec = PartitionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).identity("customer_id").build();
Table tbl = testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS, 2);
// add one more row to the same partition
shell.executeStatement("insert into customers values (1, 'Bob', 'Hoover')");
// delete all rows with id=1 and first_name=Bob
Schema shorterSchema = new Schema(optional(1, "id", Types.LongType.get()), optional(2, "name", Types.StringType.get()));
List<Record> toDelete = TestHelper.RecordsBuilder.newInstance(shorterSchema).add(1L, "Bob").build();
DeleteFile deleteFile = HiveIcebergTestUtils.createEqualityDeleteFile(tbl, "dummyPath", ImmutableList.of("customer_id", "first_name"), fileFormat, toDelete);
tbl.newRowDelta().addDeletes(deleteFile).commit();
List<Object[]> objects = shell.executeStatement("SELECT * FROM customers ORDER BY customer_id");
Assert.assertEquals(2, objects.size());
Assert.assertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, objects.get(0));
Assert.assertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, objects.get(1));
}
Aggregations