Search in sources :

Example 1 with OrcWriterOptions

use of org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions in project presto by prestodb.

the class OrcFileRewriter method rewrite.

public static OrcFileInfo rewrite(File input, File output, BitSet rowsToDelete) throws IOException {
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader());
        FileSystem fileSystem = new SyncingFileSystem(CONFIGURATION)) {
        Reader reader = createReader(fileSystem, path(input));
        if (reader.getNumberOfRows() < rowsToDelete.length()) {
            throw new IOException("File has fewer rows than deletion vector");
        }
        int deleteRowCount = rowsToDelete.cardinality();
        if (reader.getNumberOfRows() == deleteRowCount) {
            return new OrcFileInfo(0, 0);
        }
        if (reader.getNumberOfRows() >= Integer.MAX_VALUE) {
            throw new IOException("File has too many rows");
        }
        int inputRowCount = toIntExact(reader.getNumberOfRows());
        WriterOptions writerOptions = new OrcWriterOptions(CONFIGURATION).memory(new NullMemoryManager(CONFIGURATION)).fileSystem(fileSystem).compress(reader.getCompression()).inspector(reader.getObjectInspector());
        long start = System.nanoTime();
        try (Closer<RecordReader, IOException> recordReader = closer(reader.rows(), RecordReader::close);
            Closer<Writer, IOException> writer = closer(createWriter(path(output), writerOptions), Writer::close)) {
            if (reader.hasMetadataValue(OrcFileMetadata.KEY)) {
                ByteBuffer orcFileMetadata = reader.getMetadataValue(OrcFileMetadata.KEY);
                writer.get().addUserMetadata(OrcFileMetadata.KEY, orcFileMetadata);
            }
            OrcFileInfo fileInfo = rewrite(recordReader.get(), writer.get(), rowsToDelete, inputRowCount);
            log.debug("Rewrote file %s in %s (input rows: %s, output rows: %s)", input.getName(), nanosSince(start), inputRowCount, inputRowCount - deleteRowCount);
            return fileInfo;
        }
    }
}
Also used : RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) OrcFile.createReader(org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader) InterruptedIOException(java.io.InterruptedIOException) IOException(java.io.IOException) NullMemoryManager(org.apache.hadoop.hive.ql.io.orc.NullMemoryManager) ByteBuffer(java.nio.ByteBuffer) OrcWriterOptions(org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions) SyncingFileSystem(com.facebook.presto.raptor.util.SyncingFileSystem) SyncingFileSystem(com.facebook.presto.raptor.util.SyncingFileSystem) FileSystem(org.apache.hadoop.fs.FileSystem) OrcWriterOptions(org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions) WriterOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterOptions) ThreadContextClassLoader(com.facebook.presto.spi.classloader.ThreadContextClassLoader) Writer(org.apache.hadoop.hive.ql.io.orc.Writer) OrcFile.createWriter(org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter)

Aggregations

SyncingFileSystem (com.facebook.presto.raptor.util.SyncingFileSystem)1 ThreadContextClassLoader (com.facebook.presto.spi.classloader.ThreadContextClassLoader)1 IOException (java.io.IOException)1 InterruptedIOException (java.io.InterruptedIOException)1 ByteBuffer (java.nio.ByteBuffer)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 NullMemoryManager (org.apache.hadoop.hive.ql.io.orc.NullMemoryManager)1 WriterOptions (org.apache.hadoop.hive.ql.io.orc.OrcFile.WriterOptions)1 OrcFile.createReader (org.apache.hadoop.hive.ql.io.orc.OrcFile.createReader)1 OrcFile.createWriter (org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter)1 OrcWriterOptions (org.apache.hadoop.hive.ql.io.orc.OrcWriterOptions)1 Reader (org.apache.hadoop.hive.ql.io.orc.Reader)1 RecordReader (org.apache.hadoop.hive.ql.io.orc.RecordReader)1 Writer (org.apache.hadoop.hive.ql.io.orc.Writer)1