Search in sources :

Example 1 with PartitioningWriter

use of org.apache.iceberg.io.PartitioningWriter in project hive by apache.

the class HiveIcebergBufferedDeleteWriter method close.

@Override
public void close(boolean abort) throws IOException {
    long startTime = System.currentTimeMillis();
    Collection<DeleteFile> deleteFiles = new ConcurrentLinkedQueue<>();
    if (!abort) {
        LOG.info("Delete file flush is started");
        int size = Math.min(buffer.size(), poolSize);
        ExecutorService fileExecutor = fileExecutor(size);
        try {
            Tasks.foreach(buffer.keySet()).retry(3).executeWith(fileExecutor).onFailure((partition, exception) -> LOG.info("Failed to write delete file {}", partition, exception)).run(partition -> {
                PositionDelete<Record> positionDelete = PositionDelete.create();
                PartitioningWriter writerForFiles;
                try (PartitioningWriter writer = new ClusteredPositionDeleteWriter<>(writerFactory, fileFactory, io, format, targetFileSize)) {
                    Map<String, Roaring64Bitmap> deleteRows = buffer.get(partition);
                    for (String filePath : new TreeSet<>(deleteRows.keySet())) {
                        Roaring64Bitmap deletes = deleteRows.get(filePath);
                        PeekableLongIterator longIterator = deletes.getLongIterator();
                        while (longIterator.hasNext()) {
                            long position = longIterator.next();
                            positionDelete.set(filePath, position, null);
                            writer.write(positionDelete, keyToSpec.get(partition), partition);
                        }
                    }
                    // We need the writer object later to get the generated data files
                    writerForFiles = writer;
                }
                deleteFiles.addAll(((DeleteWriteResult) writerForFiles.result()).deleteFiles());
            }, IOException.class);
        } finally {
            fileExecutor.shutdown();
        }
    }
    LOG.info("HiveIcebergBufferedDeleteWriter is closed with abort={}. Created {} delete files and it took {} ns.", abort, deleteFiles.size(), System.currentTimeMillis() - startTime);
    LOG.debug("Delete files written {}", deleteFiles);
    this.filesForCommit = FilesForCommit.onlyDelete(deleteFiles);
}
Also used : PartitioningWriter(org.apache.iceberg.io.PartitioningWriter) LoggerFactory(org.slf4j.LoggerFactory) Writable(org.apache.hadoop.io.Writable) TreeSet(java.util.TreeSet) Map(java.util.Map) GenericRecord(org.apache.iceberg.data.GenericRecord) ClusteredPositionDeleteWriter(org.apache.iceberg.io.ClusteredPositionDeleteWriter) PartitionKey(org.apache.iceberg.PartitionKey) Container(org.apache.iceberg.mr.mapred.Container) ExecutorService(java.util.concurrent.ExecutorService) Roaring64Bitmap(org.roaringbitmap.longlong.Roaring64Bitmap) IcebergAcidUtil(org.apache.iceberg.mr.hive.IcebergAcidUtil) Logger(org.slf4j.Logger) InternalRecordWrapper(org.apache.iceberg.data.InternalRecordWrapper) FileWriterFactory(org.apache.iceberg.io.FileWriterFactory) OutputFileFactory(org.apache.iceberg.io.OutputFileFactory) Collection(java.util.Collection) Maps(org.apache.iceberg.relocated.com.google.common.collect.Maps) ThreadFactoryBuilder(org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder) FilesForCommit(org.apache.iceberg.mr.hive.FilesForCommit) IOException(java.io.IOException) PeekableLongIterator(org.roaringbitmap.longlong.PeekableLongIterator) Schema(org.apache.iceberg.Schema) FileFormat(org.apache.iceberg.FileFormat) Executors(java.util.concurrent.Executors) Record(org.apache.iceberg.data.Record) DeleteWriteResult(org.apache.iceberg.io.DeleteWriteResult) Tasks(org.apache.iceberg.util.Tasks) PartitionSpec(org.apache.iceberg.PartitionSpec) DeleteFile(org.apache.iceberg.DeleteFile) FileIO(org.apache.iceberg.io.FileIO) PositionDelete(org.apache.iceberg.deletes.PositionDelete) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) ClusteredPositionDeleteWriter(org.apache.iceberg.io.ClusteredPositionDeleteWriter) PartitioningWriter(org.apache.iceberg.io.PartitioningWriter) PeekableLongIterator(org.roaringbitmap.longlong.PeekableLongIterator) TreeSet(java.util.TreeSet) ExecutorService(java.util.concurrent.ExecutorService) Roaring64Bitmap(org.roaringbitmap.longlong.Roaring64Bitmap) GenericRecord(org.apache.iceberg.data.GenericRecord) Record(org.apache.iceberg.data.Record) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DeleteFile(org.apache.iceberg.DeleteFile)

Aggregations

IOException (java.io.IOException)1 Collection (java.util.Collection)1 Map (java.util.Map)1 TreeSet (java.util.TreeSet)1 ConcurrentLinkedQueue (java.util.concurrent.ConcurrentLinkedQueue)1 ExecutorService (java.util.concurrent.ExecutorService)1 Executors (java.util.concurrent.Executors)1 Writable (org.apache.hadoop.io.Writable)1 DeleteFile (org.apache.iceberg.DeleteFile)1 FileFormat (org.apache.iceberg.FileFormat)1 PartitionKey (org.apache.iceberg.PartitionKey)1 PartitionSpec (org.apache.iceberg.PartitionSpec)1 Schema (org.apache.iceberg.Schema)1 GenericRecord (org.apache.iceberg.data.GenericRecord)1 InternalRecordWrapper (org.apache.iceberg.data.InternalRecordWrapper)1 Record (org.apache.iceberg.data.Record)1 PositionDelete (org.apache.iceberg.deletes.PositionDelete)1 ClusteredPositionDeleteWriter (org.apache.iceberg.io.ClusteredPositionDeleteWriter)1 DeleteWriteResult (org.apache.iceberg.io.DeleteWriteResult)1 FileIO (org.apache.iceberg.io.FileIO)1