use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.
the class BenchmarkDeltaByteArray method benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkRandomStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
DeltaByteArrayReader reader = new DeltaByteArrayReader();
Utils.writeData(writer, values);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.
the class TestCorruptDeltaByteArrays method testOldReassemblyWithoutCorruption.
@Test
public void testOldReassemblyWithoutCorruption() throws Exception {
DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
for (int i = 0; i < 10; i += 1) {
writer.writeBytes(Binary.fromString(str(i)));
}
ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
// sets previous to new byte[0]
writer.reset();
for (int i = 10; i < 20; i += 1) {
writer.writeBytes(Binary.fromString(str(i)));
}
ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();
DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
for (int i = 0; i < 10; i += 1) {
assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
}
DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));
for (int i = 10; i < 20; i += 1) {
assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
}
}
use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.
the class TestCorruptDeltaByteArrays method testReassemblyWithoutCorruption.
@Test
public void testReassemblyWithoutCorruption() throws Exception {
DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
for (int i = 0; i < 10; i += 1) {
writer.writeBytes(Binary.fromString(str(i)));
}
ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
// sets previous to new byte[0]
writer.reset();
for (int i = 10; i < 20; i += 1) {
writer.writeBytes(Binary.fromString(str(i)));
}
ByteBuffer secondPageBytes = writer.getBytes().toByteBuffer();
DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
for (int i = 0; i < 10; i += 1) {
assertEquals(firstPageReader.readBytes().toStringUsingUTF8(), str(i));
}
DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(secondPageBytes));
secondPageReader.setPreviousReader(firstPageReader);
for (int i = 10; i < 20; i += 1) {
assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
}
}
use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.
the class TestCorruptDeltaByteArrays method testReassemblyWithCorruptPage.
@Test
public void testReassemblyWithCorruptPage() throws Exception {
DeltaByteArrayWriter writer = getDeltaByteArrayWriter();
String lastValue = null;
for (int i = 0; i < 10; i += 1) {
lastValue = str(i);
writer.writeBytes(Binary.fromString(lastValue));
}
ByteBuffer firstPageBytes = writer.getBytes().toByteBuffer();
// sets previous to new byte[0]
writer.reset();
corruptWriter(writer, lastValue);
for (int i = 10; i < 20; i += 1) {
writer.writeBytes(Binary.fromString(str(i)));
}
ByteBuffer corruptPageBytes = writer.getBytes().toByteBuffer();
DeltaByteArrayReader firstPageReader = new DeltaByteArrayReader();
firstPageReader.initFromPage(10, ByteBufferInputStream.wrap(firstPageBytes));
for (int i = 0; i < 10; i += 1) {
assertEquals(str(i), firstPageReader.readBytes().toStringUsingUTF8());
}
DeltaByteArrayReader corruptPageReader = new DeltaByteArrayReader();
corruptPageReader.initFromPage(10, ByteBufferInputStream.wrap(corruptPageBytes));
try {
corruptPageReader.readBytes();
fail("Corrupt page did not throw an exception when read");
} catch (ArrayIndexOutOfBoundsException e) {
// expected, this is a corrupt page
}
DeltaByteArrayReader secondPageReader = new DeltaByteArrayReader();
secondPageReader.initFromPage(10, ByteBufferInputStream.wrap(corruptPageBytes));
secondPageReader.setPreviousReader(firstPageReader);
for (int i = 10; i < 20; i += 1) {
assertEquals(secondPageReader.readBytes().toStringUsingUTF8(), str(i));
}
}
use of org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader in project parquet-mr by apache.
the class BenchmarkDeltaByteArray method benchmarkSortedStringsWithDeltaLengthByteArrayValuesWriter.
@BenchmarkOptions(benchmarkRounds = 20, warmupRounds = 4)
@Test
public void benchmarkSortedStringsWithDeltaLengthByteArrayValuesWriter() throws IOException {
DeltaByteArrayWriter writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator());
DeltaByteArrayReader reader = new DeltaByteArrayReader();
Utils.writeData(writer, sortedVals);
ByteBufferInputStream data = writer.getBytes().toInputStream();
Binary[] bin = Utils.readData(reader, data, values.length);
System.out.println("size " + data.position());
}
Aggregations