Search in sources :

Example 1 with SIZE_OF_LONG

use of io.airlift.slice.SizeOf.SIZE_OF_LONG in project presto by prestodb.

the class RcFileReader method advance.

public int advance() throws IOException {
    if (closed) {
        return -1;
    }
    rowGroupPosition += ColumnData.MAX_SIZE;
    currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount - rowGroupPosition);
    // do we still have rows in the current row group
    if (currentChunkRowCount > 0) {
        validateWritePageChecksum();
        return currentChunkRowCount;
    }
    // are we at the end?
    if (input.remaining() == 0) {
        close();
        return -1;
    }
    // read uncompressed size of row group (which is useless information)
    if (input.remaining() < SIZE_OF_INT) {
        throw corrupt("RCFile truncated %s", dataSource);
    }
    int unusedRowGroupSize = Integer.reverseBytes(input.readInt());
    // read sequence sync if present
    if (unusedRowGroupSize == -1) {
        if (input.remaining() < SIZE_OF_LONG + SIZE_OF_LONG + SIZE_OF_INT) {
            throw corrupt("RCFile truncated %s", dataSource);
        }
        // NOTE: this decision must agree with RcFileDecoderUtils.findFirstSyncPosition
        if (input.position() - SIZE_OF_INT >= end) {
            close();
            return -1;
        }
        if (syncFirst != input.readLong() || syncSecond != input.readLong()) {
            throw corrupt("Invalid sync in RCFile %s", dataSource);
        }
        // read the useless uncompressed length
        unusedRowGroupSize = Integer.reverseBytes(input.readInt());
    } else if (rowsRead > 0) {
        validateWrite(writeValidation -> false, "Expected sync sequence for every row group except the first one");
    }
    if (unusedRowGroupSize <= 0) {
        throw corrupt("Invalid uncompressed row group length %s", unusedRowGroupSize);
    }
    // read row group header
    int uncompressedHeaderSize = Integer.reverseBytes(input.readInt());
    int compressedHeaderSize = Integer.reverseBytes(input.readInt());
    if (compressedHeaderSize > compressedHeaderBuffer.length()) {
        compressedHeaderBuffer = Slices.allocate(compressedHeaderSize);
    }
    input.readBytes(compressedHeaderBuffer, 0, compressedHeaderSize);
    // decompress row group header
    Slice header;
    if (decompressor != null) {
        if (headerBuffer.length() < uncompressedHeaderSize) {
            headerBuffer = Slices.allocate(uncompressedHeaderSize);
        }
        Slice buffer = headerBuffer.slice(0, uncompressedHeaderSize);
        decompressor.decompress(compressedHeaderBuffer, buffer);
        header = buffer;
    } else {
        if (compressedHeaderSize != uncompressedHeaderSize) {
            throw corrupt("Invalid RCFile %s", dataSource);
        }
        header = compressedHeaderBuffer;
    }
    BasicSliceInput headerInput = header.getInput();
    // read number of rows in row group
    rowGroupRowCount = toIntExact(readVInt(headerInput));
    rowsRead += rowGroupRowCount;
    rowGroupPosition = 0;
    currentChunkRowCount = min(ColumnData.MAX_SIZE, rowGroupRowCount);
    // set column buffers
    int totalCompressedDataSize = 0;
    for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
        int compressedDataSize = toIntExact(readVInt(headerInput));
        totalCompressedDataSize += compressedDataSize;
        int uncompressedDataSize = toIntExact(readVInt(headerInput));
        if (decompressor == null && compressedDataSize != uncompressedDataSize) {
            throw corrupt("Invalid RCFile %s", dataSource);
        }
        int lengthsSize = toIntExact(readVInt(headerInput));
        Slice lengthsBuffer = headerInput.readSlice(lengthsSize);
        if (readColumns.containsKey(columnIndex)) {
            Slice dataBuffer = input.readSlice(compressedDataSize);
            columns[columnIndex].setBuffers(lengthsBuffer, dataBuffer, uncompressedDataSize);
        } else {
            skipFully(input, compressedDataSize);
        }
    }
    // this value is not used but validate it is correct since it might signal corruption
    if (unusedRowGroupSize != totalCompressedDataSize + uncompressedHeaderSize) {
        throw corrupt("Invalid row group size");
    }
    validateWriteRowGroupChecksum();
    validateWritePageChecksum();
    return currentChunkRowCount;
}
Also used : Page(com.facebook.presto.spi.Page) WriteChecksumBuilder(com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksumBuilder) Block(com.facebook.presto.spi.block.Block) RcFileDecoderUtils.findFirstSyncPosition(com.facebook.presto.rcfile.RcFileDecoderUtils.findFirstSyncPosition) Slice(io.airlift.slice.Slice) BufferReference(io.airlift.slice.ChunkedSliceInput.BufferReference) RunLengthEncodedBlock(com.facebook.presto.spi.block.RunLengthEncodedBlock) SliceInput(io.airlift.slice.SliceInput) Unit(io.airlift.units.DataSize.Unit) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) Type(com.facebook.presto.spi.type.Type) Slices(io.airlift.slice.Slices) Map(java.util.Map) Objects.requireNonNull(java.util.Objects.requireNonNull) BlockBuilderStatus(com.facebook.presto.spi.block.BlockBuilderStatus) Math.toIntExact(java.lang.Math.toIntExact) WriteChecksum(com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksum) SIZE_OF_INT(io.airlift.slice.SizeOf.SIZE_OF_INT) SliceLoader(io.airlift.slice.ChunkedSliceInput.SliceLoader) ByteStreams.skipFully(com.google.common.io.ByteStreams.skipFully) ImmutableMap(com.google.common.collect.ImmutableMap) Predicate(java.util.function.Predicate) RuntimeIOException(io.airlift.slice.RuntimeIOException) IOException(java.io.IOException) Math.min(java.lang.Math.min) BasicSliceInput(io.airlift.slice.BasicSliceInput) Preconditions.checkState(com.google.common.base.Preconditions.checkState) DataSize(io.airlift.units.DataSize) List(java.util.List) SIZE_OF_LONG(io.airlift.slice.SizeOf.SIZE_OF_LONG) Closeable(java.io.Closeable) Entry(java.util.Map.Entry) Optional(java.util.Optional) ChunkedSliceInput(io.airlift.slice.ChunkedSliceInput) RcFileDecoderUtils.readVInt(com.facebook.presto.rcfile.RcFileDecoderUtils.readVInt) WriteChecksumBuilder.createWriteChecksumBuilder(com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder) Slice(io.airlift.slice.Slice) BasicSliceInput(io.airlift.slice.BasicSliceInput)

Aggregations

RcFileDecoderUtils.findFirstSyncPosition (com.facebook.presto.rcfile.RcFileDecoderUtils.findFirstSyncPosition)1 RcFileDecoderUtils.readVInt (com.facebook.presto.rcfile.RcFileDecoderUtils.readVInt)1 WriteChecksum (com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksum)1 WriteChecksumBuilder (com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksumBuilder)1 WriteChecksumBuilder.createWriteChecksumBuilder (com.facebook.presto.rcfile.RcFileWriteValidation.WriteChecksumBuilder.createWriteChecksumBuilder)1 Page (com.facebook.presto.spi.Page)1 Block (com.facebook.presto.spi.block.Block)1 BlockBuilderStatus (com.facebook.presto.spi.block.BlockBuilderStatus)1 RunLengthEncodedBlock (com.facebook.presto.spi.block.RunLengthEncodedBlock)1 Type (com.facebook.presto.spi.type.Type)1 Preconditions.checkArgument (com.google.common.base.Preconditions.checkArgument)1 Preconditions.checkState (com.google.common.base.Preconditions.checkState)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ByteStreams.skipFully (com.google.common.io.ByteStreams.skipFully)1 BasicSliceInput (io.airlift.slice.BasicSliceInput)1 ChunkedSliceInput (io.airlift.slice.ChunkedSliceInput)1 BufferReference (io.airlift.slice.ChunkedSliceInput.BufferReference)1 SliceLoader (io.airlift.slice.ChunkedSliceInput.SliceLoader)1 RuntimeIOException (io.airlift.slice.RuntimeIOException)1 SIZE_OF_INT (io.airlift.slice.SizeOf.SIZE_OF_INT)1