Search in sources :

Example 1 with BinaryValuesDecoder

use of com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder in project presto by prestodb.

the class BinaryNestedBatchReader method readNestedNoNull.

@Override
protected ColumnChunk readNestedNoNull() throws IOException {
    int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
    RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
    DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
    int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
    int newBatchSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int valueCount = 0;
        for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
            valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
        }
        newBatchSize += valueCount;
        valuesDecoderContext.setNonNullCount(valueCount);
        valuesDecoderContext.setValueCount(valueCount);
    }
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    int bufferSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[newBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, Optional.empty());
    return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
Also used : ArrayList(java.util.ArrayList) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 2 with BinaryValuesDecoder

use of com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder in project presto by prestodb.

the class BinaryFlatBatchReader method readWithoutNull.

private ColumnChunk readWithoutNull() throws IOException {
    boolean[] isNull = new boolean[nextBatchSize];
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
    int bufferSize = 0;
    int remainingInBatch = nextBatchSize;
    int startOffset = 0;
    while (remainingInBatch > 0) {
        if (remainingCountInPage == 0) {
            if (!readNextPage()) {
                break;
            }
        }
        int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
        ValueBuffer valueBuffer = valuesDecoder.readNext(readChunkSize);
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
        ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
        valuesDecoderContext.setValueCount(readChunkSize);
        valuesDecoderContext.setNonNullCount(readChunkSize);
        valuesDecoderContexts.add(valuesDecoderContext);
        startOffset += readChunkSize;
        remainingInBatch -= readChunkSize;
        remainingCountInPage -= readChunkSize;
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[nextBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
        BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, Optional.of(isNull));
    return new ColumnChunk(block, new int[0], new int[0]);
}
Also used : ArrayList(java.util.ArrayList) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 3 with BinaryValuesDecoder

use of com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder in project presto by prestodb.

the class BinaryNestedBatchReader method seek.

@Override
protected void seek() throws IOException {
    if (readOffset == 0) {
        return;
    }
    int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
    RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(readOffset);
    DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
    int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int valueCount = 0;
        for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
            valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
        }
        BinaryValuesDecoder binaryValuesDecoder = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder());
        binaryValuesDecoder.skip(valueCount);
    }
}
Also used : BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 4 with BinaryValuesDecoder

use of com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder in project presto by prestodb.

the class BinaryNestedBatchReader method readNestedWithNull.

@Override
protected ColumnChunk readNestedWithNull() throws IOException {
    int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
    RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
    DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
    int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
    int newBatchSize = 0;
    int batchNonNullCount = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int nonNullCount = 0;
        int valueCount = 0;
        for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
            nonNullCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
            valueCount += (definitionLevels[i] >= maxDefinitionLevel - 1 ? 1 : 0);
        }
        batchNonNullCount += nonNullCount;
        newBatchSize += valueCount;
        valuesDecoderContext.setNonNullCount(nonNullCount);
        valuesDecoderContext.setValueCount(valueCount);
    }
    if (batchNonNullCount == 0) {
        Block block = RunLengthEncodedBlock.create(field.getType(), null, newBatchSize);
        return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
    }
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    int bufferSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[newBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    boolean[] isNull = new boolean[newBatchSize];
    int offset = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int destinationIndex = offset + valuesDecoderContext.getValueCount() - 1;
        int sourceIndex = offset + valuesDecoderContext.getNonNullCount() - 1;
        int definitionLevelIndex = valuesDecoderContext.getEnd() - 1;
        offsets[destinationIndex + 1] = offsets[sourceIndex + 1];
        while (destinationIndex >= offset) {
            if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel) {
                offsets[destinationIndex--] = offsets[sourceIndex--];
            } else if (definitionLevels[definitionLevelIndex] == maxDefinitionLevel - 1) {
                offsets[destinationIndex] = offsets[sourceIndex + 1];
                isNull[destinationIndex] = true;
                destinationIndex--;
            }
            definitionLevelIndex--;
        }
        offset += valuesDecoderContext.getValueCount();
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    boolean hasNoNull = batchNonNullCount == newBatchSize;
    Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, hasNoNull ? Optional.empty() : Optional.of(isNull));
    return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
Also used : ArrayList(java.util.ArrayList) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 5 with BinaryValuesDecoder

use of com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder in project presto by prestodb.

the class BinaryFlatBatchReader method readWithNull.

private ColumnChunk readWithNull() throws IOException {
    boolean[] isNull = new boolean[nextBatchSize];
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    List<ValuesDecoderContext> valuesDecoderContexts = new ArrayList<>();
    int bufferSize = 0;
    int totalNonNullCount = 0;
    int remainingInBatch = nextBatchSize;
    int startOffset = 0;
    while (remainingInBatch > 0) {
        if (remainingCountInPage == 0) {
            if (!readNextPage()) {
                break;
            }
        }
        int readChunkSize = Math.min(remainingCountInPage, remainingInBatch);
        int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, readChunkSize);
        totalNonNullCount += nonNullCount;
        ValueBuffer valueBuffer = valuesDecoder.readNext(nonNullCount);
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
        ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext = new ValuesDecoderContext(valuesDecoder, startOffset, startOffset + readChunkSize);
        valuesDecoderContext.setValueCount(readChunkSize);
        valuesDecoderContext.setNonNullCount(nonNullCount);
        valuesDecoderContexts.add(valuesDecoderContext);
        startOffset += readChunkSize;
        remainingInBatch -= readChunkSize;
        remainingCountInPage -= readChunkSize;
    }
    if (totalNonNullCount == 0) {
        Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize);
        return new ColumnChunk(block, new int[0], new int[0]);
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[nextBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext<BinaryValuesDecoder> valuesDecoderContext : valuesDecoderContexts) {
        BinaryValuesDecoder binaryValuesDecoder = valuesDecoderContext.getValuesDecoder();
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = binaryValuesDecoder.readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Collections.reverse(valuesDecoderContexts);
    for (ValuesDecoderContext valuesDecoderContext : valuesDecoderContexts) {
        int destinationIndex = valuesDecoderContext.getEnd() - 1;
        int sourceIndex = valuesDecoderContext.getStart() + valuesDecoderContext.getNonNullCount() - 1;
        offsets[destinationIndex + 1] = offsets[sourceIndex + 1];
        while (destinationIndex >= valuesDecoderContext.getStart()) {
            if (isNull[destinationIndex]) {
                offsets[destinationIndex] = offsets[sourceIndex + 1];
            } else {
                offsets[destinationIndex] = offsets[sourceIndex];
                sourceIndex--;
            }
            destinationIndex--;
        }
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    boolean hasNoNull = totalNonNullCount == nextBatchSize;
    Block block = new VariableWidthBlock(nextBatchSize, buffer, offsets, hasNoNull ? Optional.empty() : Optional.of(isNull));
    return new ColumnChunk(block, new int[0], new int[0]);
}
Also used : ArrayList(java.util.ArrayList) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Aggregations

BinaryValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)5 Block (com.facebook.presto.common.block.Block)4 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)4 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)4 ValueBuffer (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer)4 ColumnChunk (com.facebook.presto.parquet.reader.ColumnChunk)4 Slice (io.airlift.slice.Slice)4 ArrayList (java.util.ArrayList)4