Search in sources :

Example 16 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.

the class Decoders method createDefinitionLevelDecoder.

public static final DefinitionLevelDecoder createDefinitionLevelDecoder(ParquetEncoding encoding, int maxLevelValue, int valueCount, ByteBuffer buffer) throws IOException {
    final int bitWidth = getWidthFromMaxInt(maxLevelValue);
    if (maxLevelValue == 0 || bitWidth == 0) {
        return new DefinitionLevelDecoder(0, valueCount);
    }
    checkArgument(encoding == RLE, "Invalid definition level encoding: " + encoding);
    ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(buffer);
    final int bufferSize = readIntLittleEndian(bufferInputStream);
    DefinitionLevelDecoder definitionLevelDecoder = new DefinitionLevelDecoder(valueCount, bitWidth, bufferInputStream.sliceStream(bufferSize));
    buffer.position(buffer.position() + bufferSize + 4);
    return definitionLevelDecoder;
}
Also used : ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream)

Example 17 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.

the class Decoders method createValuesDecoder.

private static final ValuesDecoder createValuesDecoder(ColumnDescriptor columnDescriptor, Dictionary dictionary, int valueCount, ParquetEncoding encoding, byte[] buffer, int offset, int length) throws IOException {
    final PrimitiveTypeName type = columnDescriptor.getPrimitiveType().getPrimitiveTypeName();
    if (encoding == PLAIN) {
        switch(type) {
            case BOOLEAN:
                return new BooleanPlainValuesDecoder(buffer, offset, length);
            case INT32:
            case FLOAT:
                return new Int32PlainValuesDecoder(buffer, offset, length);
            case INT64:
                {
                    if (isTimeStampMicrosType(columnDescriptor)) {
                        return new Int64TimestampMicrosPlainValuesDecoder(buffer, offset, length);
                    }
                }
            case DOUBLE:
                return new Int64PlainValuesDecoder(buffer, offset, length);
            case INT96:
                return new TimestampPlainValuesDecoder(buffer, offset, length);
            case BINARY:
                return new BinaryPlainValuesDecoder(buffer, offset, length);
            case FIXED_LEN_BYTE_ARRAY:
            default:
                throw new PrestoException(PARQUET_UNSUPPORTED_COLUMN_TYPE, format("Column: %s, Encoding: %s", columnDescriptor, encoding));
        }
    }
    if (encoding == RLE && type == BOOLEAN) {
        ByteBuffer byteBuffer = ByteBuffer.wrap(buffer, offset, length);
        // skip past the length
        byteBuffer.getInt();
        return new BooleanRLEValuesDecoder(byteBuffer);
    }
    if (encoding == RLE_DICTIONARY || encoding == PLAIN_DICTIONARY) {
        InputStream inputStream = ByteBufferInputStream.wrap(ByteBuffer.wrap(buffer, offset, length));
        int bitWidth = readIntLittleEndianOnOneByte(inputStream);
        switch(type) {
            case INT32:
            case FLOAT:
                {
                    return new Int32RLEDictionaryValuesDecoder(bitWidth, inputStream, (IntegerDictionary) dictionary);
                }
            case INT64:
                {
                    if (isTimeStampMicrosType(columnDescriptor)) {
                        return new Int64TimestampMicrosRLEDictionaryValuesDecoder(bitWidth, inputStream, (LongDictionary) dictionary);
                    }
                }
            case DOUBLE:
                {
                    return new Int64RLEDictionaryValuesDecoder(bitWidth, inputStream, (LongDictionary) dictionary);
                }
            case INT96:
                {
                    return new TimestampRLEDictionaryValuesDecoder(bitWidth, inputStream, (TimestampDictionary) dictionary);
                }
            case BINARY:
                {
                    return new BinaryRLEDictionaryValuesDecoder(bitWidth, inputStream, (BinaryBatchDictionary) dictionary);
                }
            case FIXED_LEN_BYTE_ARRAY:
            default:
                throw new PrestoException(PARQUET_UNSUPPORTED_COLUMN_TYPE, format("Column: %s, Encoding: %s", columnDescriptor, encoding));
        }
    }
    if (encoding == DELTA_BINARY_PACKED) {
        ByteBufferInputStream inputStream = ByteBufferInputStream.wrap(ByteBuffer.wrap(buffer, offset, length));
        switch(type) {
            case INT32:
            case FLOAT:
                {
                    return new Int32DeltaBinaryPackedValuesDecoder(valueCount, inputStream);
                }
            case INT64:
                {
                    if (isTimeStampMicrosType(columnDescriptor)) {
                        return new Int64TimestampMicrosDeltaBinaryPackedValuesDecoder(valueCount, inputStream);
                    }
                }
            case DOUBLE:
                {
                    return new Int64DeltaBinaryPackedValuesDecoder(valueCount, inputStream);
                }
            default:
                throw new PrestoException(PARQUET_UNSUPPORTED_COLUMN_TYPE, format("Column: %s, Encoding: %s", columnDescriptor, encoding));
        }
    }
    if ((encoding == DELTA_BYTE_ARRAY || encoding == DELTA_LENGTH_BYTE_ARRAY) && type == PrimitiveTypeName.BINARY) {
        ByteBufferInputStream inputStream = ByteBufferInputStream.wrap(ByteBuffer.wrap(buffer, offset, length));
        return new BinaryDeltaValuesDecoder(encoding, valueCount, inputStream);
    }
    throw new PrestoException(PARQUET_UNSUPPORTED_ENCODING, format("Column: %s, Encoding: %s", columnDescriptor, encoding));
}
Also used : Int32PlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.Int32PlainValuesDecoder) PrestoException(com.facebook.presto.spi.PrestoException) Int64TimestampMicrosDeltaBinaryPackedValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.delta.Int64TimestampMicrosDeltaBinaryPackedValuesDecoder) Int32RLEDictionaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.Int32RLEDictionaryValuesDecoder) TimestampDictionary(com.facebook.presto.parquet.batchreader.dictionary.TimestampDictionary) BooleanRLEValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.BooleanRLEValuesDecoder) BinaryPlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.BinaryPlainValuesDecoder) Int64RLEDictionaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.Int64RLEDictionaryValuesDecoder) TimestampPlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.TimestampPlainValuesDecoder) BinaryDeltaValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.delta.BinaryDeltaValuesDecoder) TimestampRLEDictionaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.TimestampRLEDictionaryValuesDecoder) LongDictionary(com.facebook.presto.parquet.dictionary.LongDictionary) BinaryRLEDictionaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.BinaryRLEDictionaryValuesDecoder) ByteArrayInputStream(java.io.ByteArrayInputStream) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) InputStream(java.io.InputStream) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) BooleanPlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.BooleanPlainValuesDecoder) Int64PlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.Int64PlainValuesDecoder) ByteBuffer(java.nio.ByteBuffer) Int64TimestampMicrosRLEDictionaryValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.rle.Int64TimestampMicrosRLEDictionaryValuesDecoder) PrimitiveTypeName(org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName) Int64DeltaBinaryPackedValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.delta.Int64DeltaBinaryPackedValuesDecoder) Int32DeltaBinaryPackedValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.delta.Int32DeltaBinaryPackedValuesDecoder) IntegerDictionary(com.facebook.presto.parquet.dictionary.IntegerDictionary) Int64TimestampMicrosPlainValuesDecoder(com.facebook.presto.parquet.batchreader.decoders.plain.Int64TimestampMicrosPlainValuesDecoder) BinaryBatchDictionary(com.facebook.presto.parquet.batchreader.dictionary.BinaryBatchDictionary)

Example 18 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.

the class AbstractColumnReader method readPageV1.

private ValuesReader readPageV1(DataPageV1 page) {
    ValuesReader repetitionLevelReader = page.getRepetitionLevelEncoding().getValuesReader(columnDescriptor, REPETITION_LEVEL);
    ValuesReader definitionLevelReader = page.getDefinitionLevelEncoding().getValuesReader(columnDescriptor, DEFINITION_LEVEL);
    repetitionReader = new LevelValuesReader(repetitionLevelReader);
    definitionReader = new LevelValuesReader(definitionLevelReader);
    try {
        ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(page.getSlice().toByteBuffer());
        repetitionLevelReader.initFromPage(page.getValueCount(), bufferInputStream);
        definitionLevelReader.initFromPage(page.getValueCount(), bufferInputStream);
        long firstRowIndex = page.getFirstRowIndex().orElse(-1L);
        return initDataReader(page.getValueEncoding(), bufferInputStream, page.getValueCount(), firstRowIndex);
    } catch (IOException e) {
        throw new ParquetDecodingException("Error reading parquet page " + page + " in column " + columnDescriptor, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) IOException(java.io.IOException)

Example 19 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project flink by apache.

the class AbstractColumnReader method readPageV1.

private void readPageV1(DataPageV1 page) throws IOException {
    this.pageValueCount = page.getValueCount();
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
    // Initialize the decoders.
    if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
        throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
    }
    int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
    this.runLenDecoder = new RunLengthDecoder(bitWidth);
    try {
        BytesInput bytes = page.getBytes();
        ByteBufferInputStream in = bytes.toInputStream();
        rlReader.initFromPage(pageValueCount, in);
        this.runLenDecoder.initFromStream(pageValueCount, in);
        prepareNewPage(page.getValueEncoding(), in);
    } catch (IOException e) {
        throw new IOException("could not read page " + page + " in col " + descriptor, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) IOException(java.io.IOException)

Example 20 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project hive by apache.

the class BaseVectorizedColumnReader method readPageV1.

private void readPageV1(DataPageV1 page) {
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
    try {
        BytesInput bytes = page.getBytes();
        LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records");
        ByteBufferInputStream in = bytes.toInputStream();
        LOG.debug("reading repetition levels at " + in.position());
        rlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading definition levels at " + in.position());
        dlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading data at " + in.position());
        initDataReader(page.getValueEncoding(), in, page.getValueCount());
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) IOException(java.io.IOException)

Aggregations

ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)20 Test (org.junit.Test)10 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)8 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)6 ValuesReader (org.apache.parquet.column.values.ValuesReader)6 Binary (org.apache.parquet.io.api.Binary)6 BytesInput (org.apache.parquet.bytes.BytesInput)5 IOException (java.io.IOException)4 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)4 ByteBuffer (java.nio.ByteBuffer)3 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)3 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)3 DeltaByteArrayReader (org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader)2 DeltaByteArrayWriter (org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter)2 BinaryDeltaValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.BinaryDeltaValuesDecoder)1 Int32DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int32DeltaBinaryPackedValuesDecoder)1 Int64DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64DeltaBinaryPackedValuesDecoder)1 Int64TimestampMicrosDeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64TimestampMicrosDeltaBinaryPackedValuesDecoder)1 BinaryPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BinaryPlainValuesDecoder)1 BooleanPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BooleanPlainValuesDecoder)1