Search in sources :

Example 1 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
    DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) DeltaLengthByteArrayValuesReader(org.apache.parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesReader) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaLengthByteArrayValuesWriter(org.apache.parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesWriter) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 2 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    DeltaByteArrayReader reader = new DeltaByteArrayReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) DeltaByteArrayWriter(org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaByteArrayReader(org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 3 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithPlainValuesWriter.

@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
    PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    Binary[] bin = Utils.readData(reader, data, values.length);
    System.out.println("size " + data.position());
}
Also used : PlainValuesWriter(org.apache.parquet.column.values.plain.PlainValuesWriter) BinaryPlainValuesReader(org.apache.parquet.column.values.plain.BinaryPlainValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) Binary(org.apache.parquet.io.api.Binary) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 4 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class TestDeltaByteArray method testLengths.

@Test
public void testLengths() throws IOException {
    DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
    ValuesReader reader = new DeltaBinaryPackingValuesReader();
    Utils.writeData(writer, values);
    ByteBufferInputStream data = writer.getBytes().toInputStream();
    int[] bin = Utils.readInts(reader, data, values.length);
    // test prefix lengths
    Assert.assertEquals(0, bin[0]);
    Assert.assertEquals(7, bin[1]);
    Assert.assertEquals(7, bin[2]);
    reader = new DeltaBinaryPackingValuesReader();
    bin = Utils.readInts(reader, data, values.length);
    // test suffix lengths
    Assert.assertEquals(10, bin[0]);
    Assert.assertEquals(0, bin[1]);
    Assert.assertEquals(7, bin[2]);
}
Also used : DeltaBinaryPackingValuesReader(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader) ValuesReader(org.apache.parquet.column.values.ValuesReader) DirectByteBufferAllocator(org.apache.parquet.bytes.DirectByteBufferAllocator) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) DeltaBinaryPackingValuesReader(org.apache.parquet.column.values.delta.DeltaBinaryPackingValuesReader) Test(org.junit.Test)

Example 5 with ByteBufferInputStream

use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.

the class ColumnReaderImpl method readPageV1.

private void readPageV1(DataPageV1 page) {
    ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
    ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
    this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
    this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
    try {
        BytesInput bytes = page.getBytes();
        LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
        LOG.debug("reading repetition levels at 0");
        ByteBufferInputStream in = bytes.toInputStream();
        rlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading definition levels at {}", in.position());
        dlReader.initFromPage(pageValueCount, in);
        LOG.debug("reading data at {}", in.position());
        initDataReader(page.getValueEncoding(), in, page.getValueCount());
    } catch (IOException e) {
        throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
    }
}
Also used : ValuesReader(org.apache.parquet.column.values.ValuesReader) ParquetDecodingException(org.apache.parquet.io.ParquetDecodingException) BytesInput(org.apache.parquet.bytes.BytesInput) ByteBufferInputStream(org.apache.parquet.bytes.ByteBufferInputStream) IOException(java.io.IOException)

Aggregations

ByteBufferInputStream (org.apache.parquet.bytes.ByteBufferInputStream)20 Test (org.junit.Test)10 DirectByteBufferAllocator (org.apache.parquet.bytes.DirectByteBufferAllocator)8 BenchmarkOptions (com.carrotsearch.junitbenchmarks.BenchmarkOptions)6 ValuesReader (org.apache.parquet.column.values.ValuesReader)6 Binary (org.apache.parquet.io.api.Binary)6 BytesInput (org.apache.parquet.bytes.BytesInput)5 IOException (java.io.IOException)4 PlainValuesWriter (org.apache.parquet.column.values.plain.PlainValuesWriter)4 ByteBuffer (java.nio.ByteBuffer)3 BinaryPlainValuesReader (org.apache.parquet.column.values.plain.BinaryPlainValuesReader)3 ParquetDecodingException (org.apache.parquet.io.ParquetDecodingException)3 DeltaByteArrayReader (org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader)2 DeltaByteArrayWriter (org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter)2 BinaryDeltaValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.BinaryDeltaValuesDecoder)1 Int32DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int32DeltaBinaryPackedValuesDecoder)1 Int64DeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64DeltaBinaryPackedValuesDecoder)1 Int64TimestampMicrosDeltaBinaryPackedValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.delta.Int64TimestampMicrosDeltaBinaryPackedValuesDecoder)1 BinaryPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BinaryPlainValuesDecoder)1 BooleanPlainValuesDecoder (com.facebook.presto.parquet.batchreader.decoders.plain.BooleanPlainValuesDecoder)1