Search in sources :

Example 6 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class DeltaBinaryPackingValuesWriterForIntegerTest method shouldConsumePageDataInInitialization.

@Test
public void shouldConsumePageDataInInitialization() throws IOException {
    int[] data = new int[2 * blockSize + 3];
    for (int i = 0; i < data.length; i++) {
        data[i] = i * 32;
    }
    writeData(data);
    reader = new DeltaBinaryPackingValuesReader();
    BytesInput bytes = writer.getBytes();
    byte[] valueContent = bytes.toByteArray();
    byte[] pageContent = new byte[valueContent.length * 10];
    int contentOffsetInPage = 33;
    System.arraycopy(valueContent, 0, pageContent, contentOffsetInPage, valueContent.length);
    // offset should be correct
    ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.wrap(pageContent));
    stream.skipFully(contentOffsetInPage);
    reader.initFromPage(100, stream);
    long offset = stream.position();
    assertEquals(valueContent.length + contentOffsetInPage, offset);
    // should be able to read data correctly
    for (int i : data) {
        assertEquals(i, reader.readInteger());
    }
}
Also used : BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Test(org.junit.Test)

Example 7 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class DeltaBinaryPackingValuesWriterForLongTest method shouldReturnCorrectOffsetAfterInitialization.

@Test
public void shouldReturnCorrectOffsetAfterInitialization() throws IOException {
    long[] data = new long[2 * blockSize + 3];
    for (int i = 0; i < data.length; i++) {
        data[i] = i * 32;
    }
    writeData(data);
    reader = new DeltaBinaryPackingValuesReader();
    BytesInput bytes = writer.getBytes();
    byte[] valueContent = bytes.toByteArray();
    byte[] pageContent = new byte[valueContent.length * 10];
    int contentOffsetInPage = 33;
    System.arraycopy(valueContent, 0, pageContent, contentOffsetInPage, valueContent.length);
    // offset should be correct
    ByteBufferInputStream stream = ByteBufferInputStream.wrap(ByteBuffer.wrap(pageContent));
    stream.skipFully(contentOffsetInPage);
    reader.initFromPage(100, stream);
    long offset = stream.position();
    assertEquals(valueContent.length + contentOffsetInPage, offset);
    // should be able to read data correctly
    for (long i : data) {
        assertEquals(i, reader.readLong());
    }
}
Also used : BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Test(org.junit.Test)

Example 8 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project drill by apache.

the class PageReader method decodeLevels.

/**
 * Decodes any repetition and definition level data in this page
 * @returns the offset into the page buffer after any levels have been decoded.
 */
protected int decodeLevels() throws IOException {
    int maxRepLevel = columnDescriptor.getMaxRepetitionLevel();
    int maxDefLevel = columnDescriptor.getMaxDefinitionLevel();
    int dataOffset;
    switch(pageHeader.getType()) {
        case DATA_PAGE:
            ByteBufferInputStream dataStream = ByteBufferInputStream.wrap(pageData.nioBuffer(0, byteLength));
            if (maxRepLevel > 0) {
                Encoding rlEncoding = METADATA_CONVERTER.getEncoding(dataPageInfo.getRepetitionLevelEncoding());
                ValuesReader rlReader = rlEncoding.getValuesReader(columnDescriptor, ValuesType.REPETITION_LEVEL);
                rlReader.initFromPage(pageValueCount, dataStream);
                this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
                // we know that the first value will be a 0, at the end of each list of repeated values we will hit another 0 indicating
                // a new record, although we don't know the length until we hit it (and this is a one way stream of integers) so we
                // read the first zero here to simplify the reading processes, and start reading the first value the same as all
                // of the rest. Effectively we are 'reading' the non-existent value in front of the first allowing direct access to
                // the first list of repetition levels
                this.repetitionLevels.nextInt();
            }
            if (maxDefLevel > 0) {
                Encoding dlEncoding = METADATA_CONVERTER.getEncoding(dataPageInfo.getDefinitionLevelEncoding());
                ValuesReader dlReader = dlEncoding.getValuesReader(columnDescriptor, ValuesType.DEFINITION_LEVEL);
                dlReader.initFromPage(pageValueCount, dataStream);
                this.definitionLevels = new ValuesReaderIntIterator(dlReader);
            }
            dataOffset = (int) dataStream.position();
            break;
        case DATA_PAGE_V2:
            int repLevelLen = pageHeader.data_page_header_v2.repetition_levels_byte_length;
            int defLevelLen = pageHeader.data_page_header_v2.definition_levels_byte_length;
            if (maxRepLevel > 0) {
                this.repetitionLevels = newRLEIterator(maxRepLevel, BytesInput.from(pageData.nioBuffer(0, repLevelLen)));
                // See earlier comment.
                this.repetitionLevels.nextInt();
            }
            if (maxDefLevel > 0) {
                this.definitionLevels = newRLEIterator(maxDefLevel, BytesInput.from(pageData.nioBuffer(repLevelLen, defLevelLen)));
            }
            dataOffset = repLevelLen + defLevelLen;
            break;
        default:
            throw new DrillRuntimeException(String.format("Did not expect to find a page of type %s now.", pageHeader.getType()));
    }
    return dataOffset;
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) DictionaryValuesReader(org.apache.parquet.column.values.dictionary.DictionaryValuesReader) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Encoding(org.apache.parquet.column.Encoding) DrillRuntimeException(org.apache.drill.common.exceptions.DrillRuntimeException)

Example 9 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.

the class Decoders method createRepetitionLevelDecoder.

public static final RepetitionLevelDecoder createRepetitionLevelDecoder(ParquetEncoding encoding, int maxLevelValue, int valueCount, ByteBuffer buffer) throws IOException {
    final int bitWidth = getWidthFromMaxInt(maxLevelValue);
    if (maxLevelValue == 0 || bitWidth == 0) {
        return new RepetitionLevelDecoder(0, valueCount);
    }
    checkArgument(encoding == RLE, "Invalid repetition level encoding: " + encoding);
    ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(buffer);
    final int bufferSize = readIntLittleEndian(bufferInputStream);
    RepetitionLevelDecoder repetitionLevelDecoder = new RepetitionLevelDecoder(valueCount, bitWidth, bufferInputStream.sliceStream(bufferSize));
    buffer.position(buffer.position() + bufferSize + 4);
    return repetitionLevelDecoder;
}
Also used : ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream)

Example 10 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project presto by prestodb.

the class Decoders method createFlatDefinitionLevelDecoder.

private static final FlatDefinitionLevelDecoder createFlatDefinitionLevelDecoder(ParquetEncoding encoding, boolean isRequired, int maxLevelValue, int valueCount, ByteBuffer buffer) throws IOException {
    if (isRequired) {
        return new FlatDefinitionLevelDecoder(1, valueCount);
    }
    if (maxLevelValue == 0) {
        return new FlatDefinitionLevelDecoder(0, valueCount);
    }
    final int bitWidth = getWidthFromMaxInt(maxLevelValue);
    if (bitWidth == 0) {
        return new FlatDefinitionLevelDecoder(0, valueCount);
    }
    checkArgument(encoding == RLE, "Invalid definition level encoding: " + encoding);
    ByteBufferInputStream bufferInputStream = ByteBufferInputStream.wrap(buffer);
    final int bufferSize = readIntLittleEndian(bufferInputStream);
    FlatDefinitionLevelDecoder definitionLevelDecoder = new FlatDefinitionLevelDecoder(valueCount, bufferInputStream.sliceStream(bufferSize));
    buffer.position(buffer.position() + bufferSize + 4);
    return definitionLevelDecoder;
}
Also used : ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream)

Aggregations

ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)20 Test (org.junit.Test)10 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)8 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)6 ValuesReader (org.apache.parquet.column.values.ValuesReader)6 Binary (org.apache.parquet.io.api.Binary)6 BytesInput (org.apache.parquet.bytes.BytesInput)5 IOException (java.io.IOException)4 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)4 ByteBuffer (java.nio.ByteBuffer)3 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)3 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)3 DeltaByteArrayReader (org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader)2 DeltaByteArrayWriter (org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter)2 BinaryDeltaValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.BinaryDeltaValuesDecoder)1 Int32DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int32DeltaBinaryPackedValuesDecoder)1 Int64DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64DeltaBinaryPackedValuesDecoder)1 Int64TimestampMicrosDeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64TimestampMicrosDeltaBinaryPackedValuesDecoder)1 BinaryPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BinaryPlainValuesDecoder)1 BooleanPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BooleanPlainValuesDecoder)1