use of org.apache.iceberg.RowDelta in project hive by apache.
the class TestHiveIcebergDeleteWriter method testDelete.
@Test
public void testDelete() throws IOException {
HiveIcebergWriter testWriter = deleteWriter();
List<GenericRecord> deleteRecords = deleteRecords(table, DELETED_IDS);
Collections.sort(deleteRecords, Comparator.comparing(a -> a.getField(MetadataColumns.PARTITION_COLUMN_NAME).toString()));
Container<Record> container = new Container<>();
for (Record deleteRecord : deleteRecords) {
container.set(deleteRecord);
testWriter.write(container);
}
testWriter.close(false);
RowDelta rowDelta = table.newRowDelta();
testWriter.files().deleteFiles().forEach(rowDelta::addDeletes);
rowDelta.commit();
StructLikeSet expected = rowSetWithoutIds(RECORDS, DELETED_IDS);
StructLikeSet actual = actualRowSet(table);
Assert.assertEquals("Table should contain expected rows", expected, actual);
}
use of org.apache.iceberg.RowDelta in project hive by apache.
the class TestHiveIcebergUpdateWriter method update.
private static void update(Table table, HiveIcebergWriter testWriter) throws IOException {
List<GenericRecord> updateRecords = updateRecords(table, UPDATED_RECORDS);
Collections.sort(updateRecords, Comparator.comparing(a -> a.getField("data").toString()));
Container<Record> container = new Container<>();
for (Record deleteRecord : updateRecords) {
container.set(deleteRecord);
testWriter.write(container);
}
testWriter.close(false);
RowDelta rowDelta = table.newRowDelta();
testWriter.files().deleteFiles().forEach(rowDelta::addDeletes);
testWriter.files().dataFiles().forEach(rowDelta::addRows);
rowDelta.commit();
}
use of org.apache.iceberg.RowDelta in project iceberg by apache.
the class TestProjectMetaColumn method writeAndCommit.
private void writeAndCommit(Table table, List<Integer> eqFieldIds, boolean upsert, List<RowData> rows) throws IOException {
TaskWriter<RowData> writer = createTaskWriter(table, eqFieldIds, upsert);
try (TaskWriter<RowData> io = writer) {
for (RowData row : rows) {
io.write(row);
}
}
RowDelta delta = table.newRowDelta();
WriteResult result = writer.complete();
for (DataFile dataFile : result.dataFiles()) {
delta.addRows(dataFile);
}
for (DeleteFile deleteFile : result.deleteFiles()) {
delta.addDeletes(deleteFile);
}
delta.commit();
}
use of org.apache.iceberg.RowDelta in project iceberg by apache.
the class TestPartitioningWriters method testClusteredDataWriterMultiplePartitions.
@Test
public void testClusteredDataWriterMultiplePartitions() throws IOException {
table.updateSpec().addField(Expressions.ref("data")).commit();
FileWriterFactory<T> writerFactory = newWriterFactory(table.schema());
ClusteredDataWriter<T> writer = new ClusteredDataWriter<>(writerFactory, fileFactory, table.io(), fileFormat, TARGET_FILE_SIZE);
PartitionSpec spec = table.spec();
writer.write(toRow(1, "aaa"), spec, partitionKey(spec, "aaa"));
writer.write(toRow(2, "aaa"), spec, partitionKey(spec, "aaa"));
writer.write(toRow(3, "bbb"), spec, partitionKey(spec, "bbb"));
writer.write(toRow(4, "bbb"), spec, partitionKey(spec, "bbb"));
writer.write(toRow(5, "ccc"), spec, partitionKey(spec, "ccc"));
writer.close();
DataWriteResult result = writer.result();
Assert.assertEquals("Must be 3 data files", 3, result.dataFiles().size());
RowDelta rowDelta = table.newRowDelta();
result.dataFiles().forEach(rowDelta::addRows);
rowDelta.commit();
List<T> expectedRows = ImmutableList.of(toRow(1, "aaa"), toRow(2, "aaa"), toRow(3, "bbb"), toRow(4, "bbb"), toRow(5, "ccc"));
Assert.assertEquals("Records should match", toSet(expectedRows), actualRowSet("*"));
}
use of org.apache.iceberg.RowDelta in project iceberg by apache.
the class TestPartitioningWriters method testClusteredEqualityDeleteWriterMultipleSpecs.
@Test
public void testClusteredEqualityDeleteWriterMultipleSpecs() throws IOException {
List<Integer> equalityFieldIds = ImmutableList.of(table.schema().findField("id").fieldId());
Schema equalityDeleteRowSchema = table.schema().select("id");
FileWriterFactory<T> writerFactory = newWriterFactory(table.schema(), equalityFieldIds, equalityDeleteRowSchema);
// add an unpartitioned data file
ImmutableList<T> rows1 = ImmutableList.of(toRow(1, "aaa"), toRow(2, "aaa"), toRow(11, "aaa"));
DataFile dataFile1 = writeData(writerFactory, fileFactory, rows1, table.spec(), null);
table.newFastAppend().appendFile(dataFile1).commit();
// partition by bucket
table.updateSpec().addField(Expressions.bucket("data", 16)).commit();
// add a data file partitioned by bucket
ImmutableList<T> rows2 = ImmutableList.of(toRow(3, "bbb"), toRow(4, "bbb"), toRow(12, "bbb"));
DataFile dataFile2 = writeData(writerFactory, fileFactory, rows2, table.spec(), partitionKey(table.spec(), "bbb"));
table.newFastAppend().appendFile(dataFile2).commit();
// partition by data
table.updateSpec().removeField(Expressions.bucket("data", 16)).addField(Expressions.ref("data")).commit();
// add a data file partitioned by data
ImmutableList<T> rows3 = ImmutableList.of(toRow(5, "ccc"), toRow(13, "ccc"));
DataFile dataFile3 = writeData(writerFactory, fileFactory, rows3, table.spec(), partitionKey(table.spec(), "ccc"));
table.newFastAppend().appendFile(dataFile3).commit();
ClusteredEqualityDeleteWriter<T> writer = new ClusteredEqualityDeleteWriter<>(writerFactory, fileFactory, table.io(), fileFormat, TARGET_FILE_SIZE);
PartitionSpec unpartitionedSpec = table.specs().get(0);
PartitionSpec bucketSpec = table.specs().get(1);
PartitionSpec identitySpec = table.specs().get(2);
writer.write(toRow(1, "aaa"), unpartitionedSpec, null);
writer.write(toRow(2, "aaa"), unpartitionedSpec, null);
writer.write(toRow(3, "bbb"), bucketSpec, partitionKey(bucketSpec, "bbb"));
writer.write(toRow(4, "bbb"), bucketSpec, partitionKey(bucketSpec, "bbb"));
writer.write(toRow(5, "ccc"), identitySpec, partitionKey(identitySpec, "ccc"));
writer.close();
DeleteWriteResult result = writer.result();
Assert.assertEquals("Must be 3 delete files", 3, result.deleteFiles().size());
Assert.assertEquals("Must not reference data files", 0, writer.result().referencedDataFiles().size());
Assert.assertFalse("Must not reference data files", writer.result().referencesDataFiles());
RowDelta rowDelta = table.newRowDelta();
result.deleteFiles().forEach(rowDelta::addDeletes);
rowDelta.commit();
List<T> expectedRows = ImmutableList.of(toRow(11, "aaa"), toRow(12, "bbb"), toRow(13, "ccc"));
Assert.assertEquals("Records should match", toSet(expectedRows), actualRowSet("*"));
}
Aggregations