use of org.apache.iceberg.io.ClusteredPositionDeleteWriter in project iceberg by apache.
the class IcebergSourceDeleteBenchmark method writePosDeletes.
protected void writePosDeletes(CharSequence path, List<Long> deletedPos, int numNoise) throws IOException {
OutputFileFactory fileFactory = newFileFactory();
SparkFileWriterFactory writerFactory = SparkFileWriterFactory.builderFor(table()).dataFileFormat(fileFormat()).build();
ClusteredPositionDeleteWriter<InternalRow> writer = new ClusteredPositionDeleteWriter<>(writerFactory, fileFactory, table().io(), fileFormat(), TARGET_FILE_SIZE_IN_BYTES);
PartitionSpec unpartitionedSpec = table().specs().get(0);
PositionDelete<InternalRow> positionDelete = PositionDelete.create();
try (ClusteredPositionDeleteWriter<InternalRow> closeableWriter = writer) {
for (Long pos : deletedPos) {
positionDelete.set(path, pos, null);
closeableWriter.write(positionDelete, unpartitionedSpec, null);
for (int i = 0; i < numNoise; i++) {
positionDelete.set(noisePath(path), pos, null);
closeableWriter.write(positionDelete, unpartitionedSpec, null);
}
}
}
RowDelta rowDelta = table().newRowDelta();
writer.result().deleteFiles().forEach(rowDelta::addDeletes);
rowDelta.validateDeletedFiles().commit();
}
use of org.apache.iceberg.io.ClusteredPositionDeleteWriter in project hive by apache.
the class HiveIcebergBufferedDeleteWriter method close.
@Override
public void close(boolean abort) throws IOException {
long startTime = System.currentTimeMillis();
Collection<DeleteFile> deleteFiles = new ConcurrentLinkedQueue<>();
if (!abort) {
LOG.info("Delete file flush is started");
int size = Math.min(buffer.size(), poolSize);
ExecutorService fileExecutor = fileExecutor(size);
try {
Tasks.foreach(buffer.keySet()).retry(3).executeWith(fileExecutor).onFailure((partition, exception) -> LOG.info("Failed to write delete file {}", partition, exception)).run(partition -> {
PositionDelete<Record> positionDelete = PositionDelete.create();
PartitioningWriter writerForFiles;
try (PartitioningWriter writer = new ClusteredPositionDeleteWriter<>(writerFactory, fileFactory, io, format, targetFileSize)) {
Map<String, Roaring64Bitmap> deleteRows = buffer.get(partition);
for (String filePath : new TreeSet<>(deleteRows.keySet())) {
Roaring64Bitmap deletes = deleteRows.get(filePath);
PeekableLongIterator longIterator = deletes.getLongIterator();
while (longIterator.hasNext()) {
long position = longIterator.next();
positionDelete.set(filePath, position, null);
writer.write(positionDelete, keyToSpec.get(partition), partition);
}
}
// We need the writer object later to get the generated data files
writerForFiles = writer;
}
deleteFiles.addAll(((DeleteWriteResult) writerForFiles.result()).deleteFiles());
}, IOException.class);
} finally {
fileExecutor.shutdown();
}
}
LOG.info("HiveIcebergBufferedDeleteWriter is closed with abort={}. Created {} delete files and it took {} ns.", abort, deleteFiles.size(), System.currentTimeMillis() - startTime);
LOG.debug("Delete files written {}", deleteFiles);
this.filesForCommit = FilesForCommit.onlyDelete(deleteFiles);
}
use of org.apache.iceberg.io.ClusteredPositionDeleteWriter in project iceberg by apache.
the class WritersBenchmark method writeUnpartitionedClusteredPositionDeleteWriter.
@Benchmark
@Threads(1)
public void writeUnpartitionedClusteredPositionDeleteWriter(Blackhole blackhole) throws IOException {
FileIO io = table().io();
OutputFileFactory fileFactory = newFileFactory();
SparkFileWriterFactory writerFactory = SparkFileWriterFactory.builderFor(table()).dataFileFormat(fileFormat()).build();
ClusteredPositionDeleteWriter<InternalRow> writer = new ClusteredPositionDeleteWriter<>(writerFactory, fileFactory, io, fileFormat(), TARGET_FILE_SIZE_IN_BYTES);
PositionDelete<InternalRow> positionDelete = PositionDelete.create();
try (ClusteredPositionDeleteWriter<InternalRow> closeableWriter = writer) {
for (InternalRow row : positionDeleteRows) {
String path = row.getString(0);
long pos = row.getLong(1);
positionDelete.set(path, pos, null);
closeableWriter.write(positionDelete, unpartitionedSpec, null);
}
}
blackhole.consume(writer);
}
Aggregations