Search in sources :

Example 6 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class TestDeltaByteArray method testLengths.

@Test
public void testLengths() throws IOException {
    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    ValuesReader reader = new DeltaBinaryPackingValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    int[] bin = Utils.readInts(reader, data, values.length);
    // test prefix lengths
    Assert.assertEquals(0, bin[0]);
    Assert.assertEquals(7, bin[1]);
    Assert.assertEquals(7, bin[2]);
    reader = new DeltaBinaryPackingValuesReader();
    bin = Utils.readInts(reader, data, values.length);
    // test suffix lengths
    Assert.assertEquals(10, bin[0]);
    Assert.assertEquals(0, bin[1]);
    Assert.assertEquals(7, bin[2]);
}
Also used : DeltaBinaryPackingValuesReader(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader) ValuesReader(org.apache.parquet.column.values.ValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaBinaryPackingValuesReader(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader) Test(org.junit.Test)

Example 7 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class ColumnReaderImpl method readPageV1.

private void readPageV1(DataPageV1 page) {
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
    try {
        BytesInput bytes = page.getBytes();
        LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
        LOG.debug("reading repetition levels at 0");
        ByteBufferInputStream in = bytes.toInputStream();
        rlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading definition levels at {}", in.position());
        dlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading data at {}", in.position());
        initDataReader(page.getValueEncoding(), in, page.getValueCount());
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) IOException(java.io.IOException)

Example 8 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project parquet-mr by apache.

the class BenchmarkReadingRandomIntegers method readingRLE.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 10)
@Test
public void readingRLE() throws IOException {
    for (int j = 0; j < 10; j++) {
        ValuesReader reader = new RunLengthBitPackingHybridValuesReader(32);
        readData(reader, rleBytes);
    }
}
Also used : DeltaBinaryPackingValuesReader(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader) RunLengthBitPackingHybridValuesReader(org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesReader) ValuesReader(org.apache.parquet.column.values.ValuesReader) RunLengthBitPackingHybridValuesReader(org.apache.parquet.column.values.rle.RunLengthBitPackingHybridValuesReader) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 9 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project hive by apache.

the class VectorizedPrimitiveColumnReader method readPageV1.

private void readPageV1(DataPageV1 page) {
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
    try {
        byte[] bytes = page.getBytes().toByteArray();
        LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records");
        LOG.debug("reading repetition levels at 0");
        rlReader.initFromPage(pageValueCount, bytes, 0);
        int next = rlReader.getNextOffset();
        LOG.debug("reading definition levels at " + next);
        dlReader.initFromPage(pageValueCount, bytes, next);
        next = dlReader.getNextOffset();
        LOG.debug("reading data at " + next);
        initDataReader(page.getValueEncoding(), bytes, next, page.getValueCount());
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) IOException(java.io.IOException)

Example 10 with ValuesReader

use of org.apache.parquet.column.values.ValuesReader in project hive by apache.

the class BaseVectorizedColumnReader method readPageV1.

private void readPageV1(DataPageV1 page) {
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
    try {
        byte[] bytes = page.getBytes().toByteArray();
        LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records");
        LOG.debug("reading repetition levels at 0");
        rlReader.initFromPage(pageValueCount, bytes, 0);
        int next = rlReader.getNextOffset();
        LOG.debug("reading definition levels at " + next);
        dlReader.initFromPage(pageValueCount, bytes, next);
        next = dlReader.getNextOffset();
        LOG.debug("reading data at " + next);
        initDataReader(page.getValueEncoding(), bytes, next, page.getValueCount());
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) IOException(java.io.IOException)

Aggregations

ValuesReader (org.apache.parquet.column.values.ValuesReader)15 Test (org.junit.Test)10 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)7 PlainValuesReader (org.apache.parquet.column.values.plain.PlainValuesReader)7 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)7 IOException (java.io.IOException)4 ValuesWriter (org.apache.parquet.column.values.ValuesWriter)4 PlainDoubleDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter)4 PlainFloatDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainFloatDictionaryValuesWriter)4 PlainIntegerDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter)4 PlainLongDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainLongDictionaryValuesWriter)4 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)4 BytesInput (org.apache.parquet.bytes.BytesInput)3 DeltaBinaryPackingValuesReader (org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader)3 PlainBinaryDictionaryValuesWriter (org.apache.parquet.column.values.dictionary.DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter)3 FallbackValuesWriter (org.apache.parquet.column.values.fallback.FallbackValuesWriter)3 ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)2 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)1 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)1 RequiresPreviousReader (org.apache.parquet.column.values.RequiresPreviousReader)1