Search in sources :

Example 1 with ColumnChunk

use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.

the class Int32FlatBatchReader method readNext.

@Override
public ColumnChunk readNext() {
    ColumnChunk columnChunk = null;
    try {
        seek();
        if (field.isRequired()) {
            columnChunk = readWithoutNull();
        } else {
            columnChunk = readWithNull();
        }
    } catch (IOException exception) {
        throw new PrestoException(PARQUET_IO_READ_ERROR, "Error reading Parquet column " + columnDescriptor, exception);
    }
    readOffset = 0;
    nextBatchSize = 0;
    return columnChunk;
}
Also used : PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk)

Example 2 with ColumnChunk

use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.

the class AbstractNestedBatchReader method readNext.

@Override
public ColumnChunk readNext() {
    ColumnChunk columnChunk = null;
    try {
        seek();
        if (field.isRequired()) {
            columnChunk = readNestedNoNull();
        } else {
            columnChunk = readNestedWithNull();
        }
    } catch (IOException ex) {
        throw new ParquetDecodingException("Failed to decode.", ex);
    }
    readOffset = 0;
    nextBatchSize = 0;
    return columnChunk;
}
Also used : ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) IOException(java.io.IOException) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk)

Example 3 with ColumnChunk

use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.

the class BinaryNestedBatchReader method readNestedNoNull.

@Override
protected ColumnChunk readNestedNoNull() throws IOException {
    int maxDefinitionLevel = columnDescriptor.getMaxDefinitionLevel();
    RepetitionLevelDecodingContext repetitionLevelDecodingContext = readRepetitionLevels(nextBatchSize);
    DefinitionLevelDecodingContext definitionLevelDecodingContext = readDefinitionLevels(repetitionLevelDecodingContext.getDLValuesDecoderContexts(), repetitionLevelDecodingContext.getRepetitionLevels().length);
    int[] definitionLevels = definitionLevelDecodingContext.getDefinitionLevels();
    int newBatchSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        int valueCount = 0;
        for (int i = valuesDecoderContext.getStart(); i < valuesDecoderContext.getEnd(); i++) {
            valueCount += (definitionLevels[i] == maxDefinitionLevel ? 1 : 0);
        }
        newBatchSize += valueCount;
        valuesDecoderContext.setNonNullCount(valueCount);
        valuesDecoderContext.setValueCount(valueCount);
    }
    List<ValueBuffer> valueBuffers = new ArrayList<>();
    int bufferSize = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer valueBuffer = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readNext(valuesDecoderContext.getNonNullCount());
        bufferSize += valueBuffer.getBufferSize();
        valueBuffers.add(valueBuffer);
    }
    byte[] byteBuffer = new byte[bufferSize];
    int[] offsets = new int[newBatchSize + 1];
    int i = 0;
    int bufferIndex = 0;
    int offsetIndex = 0;
    for (ValuesDecoderContext valuesDecoderContext : definitionLevelDecodingContext.getValuesDecoderContexts()) {
        ValueBuffer value = valueBuffers.get(i);
        bufferIndex = ((BinaryValuesDecoder) valuesDecoderContext.getValuesDecoder()).readIntoBuffer(byteBuffer, bufferIndex, offsets, offsetIndex, value);
        offsetIndex += valuesDecoderContext.getValueCount();
        i++;
    }
    Slice buffer = Slices.wrappedBuffer(byteBuffer, 0, bufferSize);
    Block block = new VariableWidthBlock(newBatchSize, buffer, offsets, Optional.empty());
    return new ColumnChunk(block, definitionLevels, repetitionLevelDecodingContext.getRepetitionLevels());
}
Also used : ArrayList(java.util.ArrayList) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk) Slice(io.airlift.slice.Slice) ValueBuffer(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) VariableWidthBlock(com.facebook.presto.common.block.VariableWidthBlock) Block(com.facebook.presto.common.block.Block) BinaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)

Example 4 with ColumnChunk

use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.

the class Int32FlatBatchReader method readWithNull.

private ColumnChunk readWithNull() throws IOException {
    int[] values = new int[nextBatchSize];
    boolean[] isNull = new boolean[nextBatchSize];
    int totalNonNullCount = 0;
    int remainingInBatch = nextBatchSize;
    int startOffset = 0;
    while (remainingInBatch > 0) {
        if (remainingCountInPage == 0) {
            if (!readNextPage()) {
                break;
            }
        }
        int chunkSize = Math.min(remainingCountInPage, remainingInBatch);
        int nonNullCount = definitionLevelDecoder.readNext(isNull, startOffset, chunkSize);
        totalNonNullCount += nonNullCount;
        if (nonNullCount > 0) {
            valuesDecoder.readNext(values, startOffset, nonNullCount);
            int valueDestinationIndex = startOffset + chunkSize - 1;
            int valueSourceIndex = startOffset + nonNullCount - 1;
            while (valueDestinationIndex >= startOffset) {
                if (!isNull[valueDestinationIndex]) {
                    values[valueDestinationIndex] = values[valueSourceIndex];
                    valueSourceIndex--;
                }
                valueDestinationIndex--;
            }
        }
        startOffset += chunkSize;
        remainingInBatch -= chunkSize;
        remainingCountInPage -= chunkSize;
    }
    if (remainingInBatch != 0) {
        throw new ParquetDecodingException("Still remaining to be read in current batch.");
    }
    if (totalNonNullCount == 0) {
        Block block = RunLengthEncodedBlock.create(field.getType(), null, nextBatchSize);
        return new ColumnChunk(block, new int[0], new int[0]);
    }
    boolean hasNoNull = totalNonNullCount == nextBatchSize;
    Block block = new IntArrayBlock(nextBatchSize, hasNoNull ? Optional.empty() : Optional.of(isNull), values);
    return new ColumnChunk(block, new int[0], new int[0]);
}
Also used : IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) Block(com.facebook.presto.common.block.Block) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk)

Example 5 with ColumnChunk

use of com.facebook.presto.parquet.reader.ColumnChunk in project presto by prestodb.

the class Int32FlatBatchReader method readWithoutNull.

private ColumnChunk readWithoutNull() throws IOException {
    int[] values = new int[nextBatchSize];
    int remainingInBatch = nextBatchSize;
    int startOffset = 0;
    while (remainingInBatch > 0) {
        if (remainingCountInPage == 0) {
            if (!readNextPage()) {
                break;
            }
        }
        int chunkSize = Math.min(remainingCountInPage, remainingInBatch);
        valuesDecoder.readNext(values, startOffset, chunkSize);
        startOffset += chunkSize;
        remainingInBatch -= chunkSize;
        remainingCountInPage -= chunkSize;
    }
    if (remainingInBatch != 0) {
        throw new ParquetDecodingException(format("Corrupted Parquet file: extra %d values to be consumed when scanning current batch", remainingInBatch));
    }
    Block block = new IntArrayBlock(nextBatchSize, Optional.empty(), values);
    return new ColumnChunk(block, new int[0], new int[0]);
}
Also used : IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) RunLengthEncodedBlock(com.facebook.presto.common.block.RunLengthEncodedBlock) IntArrayBlock(com.facebook.presto.common.block.IntArrayBlock) Block(com.facebook.presto.common.block.Block) ColumnChunk(com.facebook.presto.parquet.reader.ColumnChunk)

Aggregations

ColumnChunk (com.facebook.presto.parquet.reader.ColumnChunk)11 Block (com.facebook.presto.common.block.Block)8 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)8 IntArrayBlock (com.facebook.presto.common.block.IntArrayBlock)4 VariableWidthBlock (com.facebook.presto.common.block.VariableWidthBlock)4 BinaryValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder)4 ValueBuffer (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.BinaryValuesDecoder.ValueBuffer)4 Slice (io.airlift.slice.Slice)4 ArrayList (java.util.ArrayList)4 IOException (java.io.IOException)3 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)3 Int32ValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.ValuesDecoder.Int32ValuesDecoder)2 PrestoException (com.facebook.presto.spi.PrestoException)2