use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
DeltaLengthByteArrayValuesWriter writer = new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
DeltaLengthByteArrayValuesReader reader = new DeltaLengthByteArrayValuesReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
DeltaByteArrayReader reader = new DeltaByteArrayReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class BenchmarkDeltaLengthByteArray method benchmarkRandomStringsWithPlainValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithPlainValuesWriter() throws IOException {
PlainValuesWriter writer = new PlainValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
BinaryPlainValuesReader reader = new BinaryPlainValuesReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class TestDeltaByteArray method testLengths.
@Test
public void testLengths() throws IOException {
DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
ValuesReader reader = new DeltaBinaryPackingValuesReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
int[] bin = Utils.readInts(reader, data, values.length);
// test prefix lengths
Assert.assertEquals(0, bin[0]);
Assert.assertEquals(7, bin[1]);
Assert.assertEquals(7, bin[2]);
reader = new DeltaBinaryPackingValuesReader();
bin = Utils.readInts(reader, data, values.length);
// test suffix lengths
Assert.assertEquals(10, bin[0]);
Assert.assertEquals(0, bin[1]);
Assert.assertEquals(7, bin[2]);
}
use of org.apache.parquet.bytes.ByteBufferInputStream in project parquet-mr by apache.
the class ColumnReaderImpl method readPageV1.
private void readPageV1(DataPageV1 page) {
ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
LOG.debug("reading repetition levels at 0");
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
LOG.debug("reading definition levels at {}", in.position());
dlReader.initFromPage(pageValueCount, in);
LOG.debug("reading data at {}", in.position());
initDataReader(page.getValueEncoding(), in, page.getValueCount());
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
}
}
Aggregations